In [None]:
!pip install kfp boto3 --upgrade

# Restart the kernel
import os
os._exit(00)

Collecting kfp
  Downloading kfp-2.0.1.tar.gz (372 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m372.4/372.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting boto3
  Downloading boto3-1.28.11-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting kfp-pipeline-spec==0.2.2
  Downloading kfp_pipeline_spec-0.2.2-py3-none-any.whl (20 kB)
Collecting kfp-server-api<2.1.0,>=2.0.0
  Downloading kfp-server-api-2.0.0.tar.gz (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.4/63.4 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting botocore<1.32.0,>=1.31.11
  Downloading botocore-1.31.11-py3-none-any.whl (11.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31

In [1]:
import boto3

# Create an STS client
sts_client = boto3.client('sts')

# Get the AWS account ID
response = sts_client.get_caller_identity()
aws_account_id = response['Account']

print("AWS Account ID:", aws_account_id)

AWS Account ID: 404886641986


In [75]:
from kfp import dsl

@dsl.component(base_image="python:3.11", packages_to_install=['numpy==1.25.1', 'pandas', 'gzip-utils==2020.7.15', 'jsons==1.6.3', 'urllib3==1.26.15', 'boto3', 'sagemaker'])
def load_dataset(aws_account_id: str) -> str:
    import pandas, boto3, gzip, numpy, urllib3.request, json

    ###################################################################
    # This is the only thing that you need to change to run this code 
    # Give the name of your S3 bucket 
    bucket = f"ack-sagemaker-bucket-{aws_account_id}"

    print("bucket name:", bucket)

    # If you are gonna use the default values of the pipeline then 
    # give a bucket name which is in us-east-1 region 
    ###################################################################

    # Load the dataset
    s3 = boto3.client("s3")
    s3.download_file("sagemaker-sample-files", "datasets/image/MNIST/mnist.pkl.gz", "mnist.pkl.gz")
    
    with gzip.open("mnist.pkl.gz", "rb") as f:
        train_set, valid_set, test_set = pandas.read_pickle(f)

    # Upload dataset to S3
    from sagemaker.amazon.common import write_numpy_to_dense_tensor
    import io
    import boto3

    train_data_key = 'mnist_kmeans_example/train_data'
    test_data_key = 'mnist_kmeans_example/test_data'
    train_data_location = 's3://{}/{}'.format(bucket, train_data_key)
    test_data_location = 's3://{}/{}'.format(bucket, test_data_key)
    print('training data will be uploaded to: {}'.format(train_data_location))
    print('training data will be uploaded to: {}'.format(test_data_location))

    # Convert the training data into the format required by the SageMaker KMeans algorithm
    buf = io.BytesIO()
    write_numpy_to_dense_tensor(buf, train_set[0], train_set[1])
    buf.seek(0)

    boto3.resource('s3').Bucket(bucket).Object(train_data_key).upload_fileobj(buf)

    # Convert the test data into the format required by the SageMaker KMeans algorithm
    write_numpy_to_dense_tensor(buf, test_set[0], test_set[1])
    buf.seek(0)

    boto3.resource('s3').Bucket(bucket).Object(test_data_key).upload_fileobj(buf)

    # Convert the valid data into the format required by the SageMaker KMeans algorithm
    numpy.savetxt('valid-data.csv', valid_set[0], delimiter=',', fmt='%g')
    s3_client = boto3.client('s3')
    input_key = "{}/valid_data.csv".format("mnist_kmeans_example/input")
    s3_client.upload_file('valid-data.csv', bucket, input_key)
    
    return "done"

In [76]:
@dsl.pipeline
def sample_sagemaker_pipeline():
    task_load = load_dataset(aws_account_id=aws_account_id)
    return task_load.output

In [77]:
from kfp import compiler

compiler.Compiler().compile(sample_sagemaker_pipeline, 'sample-sagemaker-pipeline.yaml')

# Upload pipeline to s3

In [78]:
s3_client = boto3.client("s3")

bucket_pipeline = f"ack-kubeflow-bucket-{aws_account_id}"
pipeline_key = 'notebooks/sample-sagemaker-pipeline.yaml'
pipeline_location = 's3://{}/{}'.format(bucket_pipeline, pipeline_key)

s3_client.upload_file('sample-sagemaker-pipeline.yaml', bucket_pipeline, pipeline_key)
print('pipeline is uploaded to: {}'.format(pipeline_location))

pipeline is uploaded to: s3://ack-kubeflow-bucket-404886641986/notebooks/sample-sagemaker-pipeline.yaml


# Run Pipeline

In [14]:
from kfp.client import Client
client = Client(host='kubeflow.platform.whatautomatech.com/pipelines')



In [15]:
client.create_run_from_pipeline_package('sample-sagemaker-pipeline.yaml', arguments={})

ApiException: (405)
Reason: Method Not Allowed
HTTP response headers: HTTPHeaderDict({'Content-Length': '0', 'Connection': 'keep-alive', 'Date': 'Wed, 26 Jul 2023 02:37:44 GMT', 'Set-Cookie': 'XSRF-TOKEN=6ac8b374-4a87-425b-a81a-d298b6f9f4f2; Path=/; Secure; HttpOnly; SameSite=Lax', 'x-amz-cognito-request-id': '0befca52-d819-4d28-b1ea-2f6a4543f278', 'Allow': 'GET', 'X-Content-Type-Options': 'nosniff', 'X-XSS-Protection': '1; mode=block', 'Cache-Control': 'no-cache, no-store, max-age=0, must-revalidate', 'Pragma': 'no-cache', 'Expires': '0', 'Strict-Transport-Security': 'max-age=31536000 ; includeSubDomains', 'X-Frame-Options': 'DENY', 'Server': 'Server', 'X-Cache': 'Error from cloudfront', 'Via': '1.1 122cd39a473c6e4835362753fc929a08.cloudfront.net (CloudFront)', 'X-Amz-Cf-Pop': 'IAD89-P2', 'X-Amz-Cf-Id': 'lSv4VGMnSaJIDWzmqHwmezId_04GHlHirFX2VkN0TF5-aI_riGgcRg=='})
