In [None]:
import boto3
import sagemaker

region = boto3.Session().region_name
role = sagemaker.get_execution_role()

pipeline_name = f"GLAIR-BCAF-Consultation-BatchTransform-Toyota" # Change these to reflect your project/business name

In [None]:
from pipeline import get_pipeline

pipeline = get_pipeline(
    region=region,
    role=role,
    pipeline_name=pipeline_name
)

In [None]:
pipeline.upsert(role_arn=role)

In [None]:
s3_singapore = boto3.client("s3", region_name="ap-southeast-1")

def get_latest_file(bucket_name, prefix_name):
    s3_uri_response = s3_singapore.list_objects_v2(Bucket=bucket_name, Prefix=prefix_name)
    csv_keys = [obj for obj in s3_uri_response.get("Contents", []) if obj["Key"].endswith(".csv")]
    latest_csv_key = sorted(csv_keys, key=lambda x: x["LastModified"], reverse=True)[0]["Key"]
    
    return f"s3://{bucket_name}/{latest_csv_key}"

s3_uri_lelang = get_latest_file(
    "glair-exploration-sagemaker-s3-bucket-singapore",
    "glair-bcaf-consultation-input/batch-transform/toyota"
)

s3_uri_crawling = get_latest_file(
    "glair-exploration-sagemaker-s3-bucket-singapore",
    "glair-bcaf-consultation-input/training/toyota/crawling"
)

In [None]:
print(f"The latest file for lelang data is located at '{s3_uri_lelang}'")

In [None]:
print(f"The latest file for crawling data is located at '{s3_uri_crawling}'")

In [None]:
sagemaker_virginia = boto3.client('sagemaker', region_name="us-east-1")
    
def get_latest_model():
    model_response = sagemaker_virginia.list_models(
        SortBy='CreationTime',
        SortOrder='Descending',
        NameContains="Toyota"
    )

    s3_uri_response = sagemaker_virginia.describe_model(
        ModelName=model_response['Models'][0]['ModelName']
    )

    return s3_uri_response['PrimaryContainer']['ModelDataUrl'], model_response['Models'][0]['ModelName']

model_name = get_latest_model()[1]

In [None]:
print(f"The latest file for model data is named '{model_name}'")

In [None]:
execution = pipeline.start(
    parameters=dict(
            ProcessingInstanceType="ml.m5.large",
            ProcessingInstanceCount="1",
            TrainingInstanceType="ml.m5.large",
            TransformInstanceType="ml.m5.large",
            TransformInstanceCount="1",
            InputDataLelangURI=s3_uri_lelang,
            InputDataCrawlingURI=s3_uri_crawling,
            ModelName=model_name
        )
)

In [None]:
execution.describe()