In [25]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.experiments.run import Run, load_run
import time
from datetime import datetime
from sagemaker.inputs import TrainingInput
from sagemaker import image_uris

sagemaker.__version__

'2.219.0'

In [26]:
data_version = "v1"
model_version = "v1"
session = sagemaker.Session()
sm = session.sagemaker_client

experiment_name = "team1-index-predictor-model-training"
trial_suffix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
trial_name = f"model-training-{model_version}-{trial_suffix}"

bucket_name = "team1-index-predictor-bucket"
input_prefix = f"data/processed/{data_version}"
output_bucket_prefix = f"models/{model_version}"

processing_instance_type = "ml.t3.medium"
processing_instance_count = 1

feature_group_name = 'index-predictor-feature-group-v7'

In [27]:
with Run(
    experiment_name=experiment_name,
    run_name=trial_name,
    run_display_name=trial_name,
    sagemaker_session=session,
) as run:
    experiment_config = run.experiment_config

xgboost_container = image_uris.retrieve("xgboost", session.boto_region_name, version="1.3-1")

estimator = Estimator(
    image_uri=xgboost_container,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.large',
    output_path=f"s3://{bucket_name}/{output_bucket_prefix}",
    sagemaker_session=sagemaker.Session(),
    entry_point='../../src/models/trainer.py',
)

estimator.set_hyperparameters(
    mode="feature_store",
    dataset_sizes_path="/opt/ml/input/data/dataset_sizes/dataset_sizes.json",
    data_version=data_version,
    target_column='close_target',
    columns_to_drop='write_time,api_invocation_time,is_deleted,datetime,type,version',
    model_output_path="/opt/ml/model/model",
    num_trials=10,
    feature_group_name=feature_group_name,
    bucket_name=bucket_name,
    region=session.boto_region_name
)

estimator.fit({'dataset_sizes': f"s3://{bucket_name}/data/processed/{data_version}/dataset_sizes.json"})

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-06-21-20-19-27-449


2024-06-21 20:19:28 Starting - Starting the training job...
2024-06-21 20:19:46 Starting - Preparing the instances for training...
2024-06-21 20:20:14 Downloading - Downloading input data......
2024-06-21 20:20:59 Downloading - Downloading the training image..
2024-06-21 20:21:40 Training - Training image download completed. Training in progress.[34m[2024-06-21 20:21:43.370 ip-10-0-159-213.eu-central-1.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2024-06-21 20:21:43.402 ip-10-0-159-213.eu-central-1.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2024-06-21:20:21:43:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2024-06-21:20:21:43:INFO] Failed to parse hyperparameter bucket_name value team1-index-predictor-bucket to Json.[0m
[34mReturning the value itself[0m
[34m[2024-06-21:20:21:43:INFO] Failed to parse hyperparameter columns_to_drop value write_time,api_invocation_time,is_

In [29]:
!aws s3 ls s3://{bucket_name}/{output_bucket_prefix}/sagemaker-xgboost-2024-06-21-20-19-27-449/output/

2024-06-21 20:24:05      31602 model.tar.gz


In [54]:
import gc
gc.collect()

24255

In [11]:
!aws s3 cp s3://{bucket_name}/{output_bucket_prefix}/sagemaker-xgboost-2024-06-21-20-19-27-449/output/model.tar.gz model.tar.gz

download: s3://team1-index-predictor-bucket/models/model.xgb/sagemaker-xgboost-2024-06-18-23-40-03-184/output/model.tar.gz to ./model.tar.gz
