In [None]:
# !pip install kfp --upgrade
# !which dsl-compile

In [6]:
import kfp
from kfp import components
from kfp import dsl
from kfp.aws import use_aws_secret
import time, os

In [2]:
sagemaker_train_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/aws/sagemaker/train/component.yaml')
sagemaker_model_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/aws/sagemaker/model/component.yaml')
sagemaker_deploy_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/aws/sagemaker/deploy/component.yaml')
sagemaker_batch_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/aws/sagemaker/batch_transform/component.yaml')
sagemaker_hpo_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/aws/sagemaker/hyperparameter_tuning/component.yaml')

In [3]:
import sagemaker
import boto3

sess = boto3.Session()
sm   = sess.client('sagemaker')
role = sagemaker.get_execution_role()
# role = 'arn:aws:iam::453691756499:role/service-role/AmazonSageMaker-ExecutionRole-20190820T113591'
sagemaker_session = sagemaker.Session(boto_session=sess)

In [8]:
local_dataset = 'cifar10'
if not os.listdir(local_dataset):
    !python generate_cifar10_tfrecords.py --data-dir {local_dataset}
    datasets = sagemaker_session.upload_data(path='cifar10', key_prefix='datasets/cifar10-dataset')
else:
    datasets = "s3://sagemaker-us-west-2-453691756499/datasets/cifar10-dataset"

In [9]:
bucket_name = sagemaker_session.default_bucket()
job_folder      = 'jobs'
dataset_folder  = 'datasets'

train_path = f'{datasets}/train'
val_path   = f'{datasets}/validation'
eval_path  = f'{datasets}/eval'

In [10]:
!tar cvfz sourcedir.tar.gz -C code . 
source_s3 = sagemaker_session.upload_data(path='sourcedir.tar.gz', key_prefix='training-scripts')
print(source_s3)

./
./model_def.py
./cifar10-training-sagemaker.py
./inference.py
./requirements.txt
s3://sagemaker-us-west-2-453691756499/training-scripts/sourcedir.tar.gz


In [21]:
@dsl.pipeline(
    name='cifar10 pipeline',
    description='cifar10 pipeline using SageMaker operators'
)
def cifar10_classification(region='us-west-2',
                           training_input_mode='File',
                           train_image='763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-training:1.15.2-gpu-py36-cu100-ubuntu18.04',
                           serving_image='763104351884.dkr.ecr.us-west-2.amazonaws.com/tensorflow-inference:1.15.2-cpu',
                           instance_type='ml.p3.2xlarge',
                           channels='[ \
                    { \
                        "ChannelName": "train", \
                        "DataSource": { \
                            "S3DataSource": { \
                                "S3DataType": "S3Prefix", \
                                "S3Uri": "s3://'+bucket_name+'/datasets/cifar10-dataset/train", \
                                "S3DataDistributionType": "FullyReplicated" \
                            } \
                        }, \
                        "CompressionType": "None", \
                        "RecordWrapperType": "None" \
                    }, \
                    { \
                        "ChannelName": "validation", \
                        "DataSource": { \
                            "S3DataSource": { \
                                "S3DataType": "S3Prefix", \
                                "S3Uri": "s3://'+bucket_name+'/datasets/cifar10-dataset/validation", \
                                "S3DataDistributionType": "FullyReplicated" \
                            } \
                        }, \
                        "CompressionType": "None", \
                        "RecordWrapperType": "None" \
                    }, \
                    { \
                        "ChannelName": "eval", \
                        "DataSource": { \
                            "S3DataSource": { \
                                "S3DataType": "S3Prefix", \
                                "S3Uri": "s3://'+bucket_name+'/datasets/cifar10-dataset/eval", \
                                "S3DataDistributionType": "FullyReplicated" \
                            } \
                        }, \
                        "CompressionType": "None", \
                        "RecordWrapperType": "None" \
                    } \
                ]'
                          ):
    
    training = sagemaker_train_op(
        region=region,
        image=train_image,
        training_input_mode=training_input_mode,
        hyperparameters='{ \
            "batch-size": "256", \
            "epochs": "10", \
            "learning-rate": "0.01", \
            "momentum": "0.9", \
            "optimizer": "adam", \
            "model-type": "resnet", \
            "weight-decay": "0.0002", \
            "sagemaker_program": "cifar10-training-sagemaker.py", \
            "sagemaker_region": "us-west-2", \
            "sagemaker_submit_directory": "'+source_s3+'" \
        }',
        channels=channels,
        instance_type=instance_type,
        instance_count='1',
        volume_size='50',
        max_run_time='3600',
        model_artifact_path=f's3://{bucket_name}/jobs',
        network_isolation='False',
        traffic_encryption='False',
        spot_instance='False',
        role=role,
    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
    
    create_model = sagemaker_model_op(
        region=region,
        model_name=training.outputs['job_name'],
        image=serving_image,
        model_artifact_url=training.outputs['model_artifact_url'],
        role=role
    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))

    prediction = sagemaker_deploy_op(
        region=region,
        model_name_1=create_model.output,
        instance_type_1='ml.m5.large'
    ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))

In [22]:
kfp.compiler.Compiler().compile(cifar10_classification,'sm-cifar10-pipeline.zip')

In [23]:
client = kfp.Client()
aws_experiment = client.create_experiment(name='aws')

exp_name    = f'cifar10-class-kfp-{time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())}'
my_run = client.run_pipeline(aws_experiment.id, exp_name, 'sm-cifar10-pipeline.zip')