In [1]:
import boto3
import sagemaker

In [2]:
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()

In [3]:
bucket = sagemaker_session.default_bucket()
code_location = f's3://{bucket}/xgboost/code'
output_path = f's3://{bucket}/xgboost/output'

### Define hyperparameter

In [4]:
hyperparameters = {
       "scale_pos_weight" : "29",    
        "max_depth": "3",
        "eta": "0.2",
        "objective": "binary:logistic",
        "num_round": "100",
}

In [5]:
instance_count = 1
# instance_type = "ml.m5.large"
instance_type = "local"
max_run = 1*60*60

use_spot_instances = False
if use_spot_instances:
    max_wait = 1*60*60
else:
    max_wait = None

In [6]:
if instance_type in ['local', 'local_gpu']:
    from sagemaker.local import LocalSession
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}
else:
    sagemaker_session = sagemaker.session.Session()

### Define training cluster

In [7]:
from sagemaker.xgboost.estimator import XGBoost

estimator = XGBoost(
    entry_point="xgboost_starter_script.py",
    source_dir='src',
    output_path=output_path,
    code_location=code_location,
    hyperparameters=hyperparameters,
    role=role,
    sagemaker_session=sagemaker_session,
    instance_count=instance_count,
    instance_type=instance_type,
    framework_version="1.3-1",
    max_run=max_run,
    use_spot_instances=use_spot_instances,  # spot instance 활용
    max_wait=max_wait,
)

### Prepare training dataset

In [8]:
data_path=f's3://{bucket}/xgboost/dataset'
!aws s3 sync ./data/dataset/ $data_path

In [9]:
if instance_type in ['local', 'local_gpu']:
    from pathlib import Path
    file_path = f'file://{Path.cwd()}'
    inputs = file_path.split('lab_1_training')[0] + '/data/dataset/'
    
else:
    inputs = data_path
inputs

'file:///home/ec2-user/SageMaker/sg-workshop/data/dataset/'

### Define Experiment

In [10]:
!pip install -U sagemaker-experiments

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [11]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from time import strftime

In [12]:
def create_experiment(experiment_name):
    try:
        sm_experiment = Experiment.load(experiment_name)
    except:
        sm_experiment = Experiment.create(experiment_name=experiment_name)
        
def create_trial(experiment_name):
    create_date = strftime("%m%d-%H%M%s")       
    sm_trial = Trial.create(trial_name=f'{experiment_name}-{create_date}',
                            experiment_name=experiment_name)

    job_name = f'{sm_trial.trial_name}'
    return job_name        

### Start training

In [13]:
experiment_name='xgboost-poc-1'

create_experiment(experiment_name)
job_name = create_trial(experiment_name)

estimator.fit(inputs = {'inputdata': inputs},
                  job_name = job_name,
                  experiment_config={
                      'TrialName': job_name,
                      'TrialComponentDisplayName': job_name,
                  },
                  wait=False)

#estimator.fit(inputs = {'inputdata': inputs},
#                  wait=False)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: xgboost-poc-1-0917-09131663405985
INFO:sagemaker.local.local_session:Starting training job
INFO:sagemaker.local.image:No AWS credentials found in session but credentials from EC2 Metadata Service are available.
INFO:sagemaker.local.image:docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-f5ayf:
    command: train
    container_name: g152ulhlkr-algo-1-f5ayf
    environment:
    - '[Masked]'
    - '[Masked]'
    image: 366743142698.dkr.ecr.ap-northeast-2.amazonaws.com/sagemaker-xgboost:1.3-1
    networks:
      sagemaker-local:
        aliases:
        - algo-1-f5ayf
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmp8b9rt7vx/algo-1-f5ayf/input:/opt/ml/input
    - /tmp/tmp8b9rt7vx/algo-1-f5ayf/output:/opt/ml/output
    - /tm

Creating g152ulhlkr-algo-1-f5ayf ... 
Creating g152ulhlkr-algo-1-f5ayf ... done
Attaching to g152ulhlkr-algo-1-f5ayf
[36mg152ulhlkr-algo-1-f5ayf |[0m [2022-09-17 09:13:07.800 6c85a99661ca:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[36mg152ulhlkr-algo-1-f5ayf |[0m [2022-09-17:09:13:07:INFO] Imported framework sagemaker_xgboost_container.training
[36mg152ulhlkr-algo-1-f5ayf |[0m [2022-09-17:09:13:07:INFO] No GPUs detected (normal if no gpus installed)
[36mg152ulhlkr-algo-1-f5ayf |[0m [2022-09-17:09:13:07:INFO] Invoking user training script.
[36mg152ulhlkr-algo-1-f5ayf |[0m [2022-09-17:09:13:07:INFO] Installing module with the following command:
[36mg152ulhlkr-algo-1-f5ayf |[0m /miniconda3/bin/python3 -m pip install . 
[36mg152ulhlkr-algo-1-f5ayf |[0m Processing /opt/ml/code
[36mg152ulhlkr-algo-1-f5ayf |[0m   Preparing metadata (setup.py) ... [?25ldone
[36mg152ulhlkr-algo-1-f5ayf |[0m [?25hBuilding wheels for collected packages: xgboost-starter-script
[36



===== Job Complete =====


In [14]:
estimator.logs()

### Use in SageMaker Studio

In [18]:
from sagemaker.analytics import ExperimentAnalytics
import pandas as pd
pd.options.display.max_columns = 50
pd.options.display.max_rows = 10
pd.options.display.max_colwidth = 100

In [19]:
trial_component_training_analytics = ExperimentAnalytics(
    sagemaker_session= sagemaker_session,
    experiment_name= experiment_name,
    sort_by="metrics.validation:auc.max",        
    sort_order="Descending",
    metric_names=["validation:auc"]
)

trial_component_training_analytics.dataframe()[['Experiments', 'Trials', 'validation:auc - Min', 'validation:auc - Max',
                                                'validation:auc - Avg', 'validation:auc - StdDev', 'validation:auc - Last', 
                                                'eta', 'max_depth', 'num_round', 'scale_pos_weight']]

AttributeError: 'LocalSagemakerClient' object has no attribute 'search'