In [1]:
from sagemaker.workflow.steps import TrainingStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker import Session, image_uris


In [2]:
PROCESSING_CONTAINER_DIR = "/opt/ml/processing"
PREPROCESSING_COMPONENT_SOURCE_DIR = "/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code"

In [3]:
sm_session = Session(default_bucket='kris-mlops-utilities-test')


In [4]:
image_uri = image_uris.retrieve(
        framework="xgboost",
        region="us-east-1",
        version="1.0-1",
        py_version="py3",
        instance_type="ml.m5.large",
    )

In [5]:
from sagemaker.estimator import Estimator

xgb_train = Estimator(
    image_uri=image_uri,
    instance_type="ml.m5.large",
    instance_count=1,
    base_job_name=f"notebook-train",
    sagemaker_session=sm_session,
    role="arn:aws:iam::311638508164:role/AmazonSageMaker-ExecutionRole",
)

xgb_train.set_hyperparameters(
    objective="reg:linear",
    num_round=50,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.7,
    silent=0,
)

In [6]:
from sagemaker import TrainingInput

notebook_training = TrainingStep(
        name="TrainModel",
        estimator=xgb_train,
        inputs={
            "train": TrainingInput(
                s3_data="s3://kris-mlops-utilities-test/abalone_data/train",
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data="s3://kris-mlops-utilities-test/abalone_data/test",
                content_type="text/csv",
            ),
        },
)

In [7]:

pipeline = Pipeline(
    name='training-notebook-pipeline',
    parameters=[],
    steps=[
        notebook_training
    ],
    sagemaker_session=sm_session,
)


In [8]:
pipeline.upsert(role_arn='arn:aws:iam::311638508164:role/AmazonSageMaker-ExecutionRole')

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:311638508164:pipeline/training-notebook-pipeline',
 'ResponseMetadata': {'RequestId': 'd9406cb9-7302-4a2a-9748-f4fac5ae6845',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd9406cb9-7302-4a2a-9748-f4fac5ae6845',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '94',
   'date': 'Fri, 10 Feb 2023 14:01:39 GMT'},
  'RetryAttempts': 0}}

In [9]:
from actions import run_pipeline

run_pipeline(pipeline_name='training-notebook-pipeline', execution_name_prefix='train', pipeline_params={})

{'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:311638508164:pipeline/training-notebook-pipeline/execution/kd97j0rnhzol',
 'ResponseMetadata': {'RequestId': '49506d08-3b16-4e48-b827-868fe66467d5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '49506d08-3b16-4e48-b827-868fe66467d5',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '126',
   'date': 'Fri, 10 Feb 2023 14:01:44 GMT'},
  'RetryAttempts': 0}}

In [6]:
from actions import upsert_notebook_pipeline
upsert_notebook_pipeline(pipeline_name='test-upsert', step_name='training_step', notebook_path='/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code/', config_type='', image_uri=image_uri)

ValueError: Either step_args or estimator need to be given.

In [None]:
# pipeline_name: str,
#         step_name: str,
#         notebook_path: str,
#         config_type: str,
#         pipeline_tags: Optional[Dict[str, str]] = None,
#         image_uri: Optional[str] = None,
#         is_training: bool = False,
#         dryrun: bool = False,

In [8]:
from actions import upsert_notebook_pipeline
upsert_notebook_pipeline(pipeline_name='test-upsert', notebook_path='/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code/', config_type='', image_uri=image_uri)

In [9]:
from actions import run_pipeline

run_pipeline(pipeline_name='test-upsert', execution_name_prefix='test', pipeline_params={})

{'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:311638508164:pipeline/test-upsert/execution/nqid8tg82rhm',
 'ResponseMetadata': {'RequestId': 'dc07b084-26e4-4917-ab4d-62ed4bf76ae6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'dc07b084-26e4-4917-ab4d-62ed4bf76ae6',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '111',
   'date': 'Mon, 13 Feb 2023 12:31:30 GMT'},
  'RetryAttempts': 0}}

In [None]:
# sm_session: Session, image_uri, role: str, nb_config_path: str, hyperparams_file: str

In [5]:
from notebook_helper import create_estimator

create_estimator(sm_session=sm_session, image_uri=image_uri, role="arn:aws:iam::311638508164:role/AmazonSageMaker-ExecutionRole", nb_config_path="/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code/training_pipeline.defaults.yml", hyperparams_file="/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code/hyperparams.json")

<sagemaker.estimator.Estimator at 0x12f601130>

In [None]:
#pipeline_name: str,
        # notebook_path: str,
        # role: str,
        # nb_yml_config: str,
        # pipeline_tags: Optional[Dict[str, str]] = None,
        # image_uri: Optional[str] = None,
        # dryrun: bool = False,

In [1]:
role = "arn:aws:iam::311638508164:role/AmazonSageMaker-ExecutionRole"
nb_config_path = 'processing_code/training_pipeline.defaults.yml'

In [2]:
from actions import upsert_notebook_pipeline

upsert_notebook_pipeline(pipeline_name='nb-test',
                         notebook_path ='/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code',
                         role=role,
                         nb_yml_config=nb_config_path,
                         pipeline_tags=None, image_uri=None, dryrun=False)

In [8]:
from omegaconf import OmegaConf

OmegaConf.load('processing_code/training_pipeline.defaults.yml')

{'pipeline': {'default_bucket': '???', 'role': '???', 'cache_config': {'enable_caching': True, 'expire_after': 'p1d'}, 'model_package_group_name': '???'}, 'processing': {'instance_count': 1, 'instance_type': 'ml.t3.medium', 'role': '${pipeline.role}'}, 'training': {'instance_count': 1, 'instance_type': 'ml.m5.large', 'role': '${pipeline.role}'}}

In [7]:
!ls

__init__.py         helpers.py          [34mprocessing_code[m[m     test_training.ipynb
actions.py          notebook_helper.py  test.ipynb
