In [1]:
# Set the base project name
project_name = 'sagemaker-payment'

In [2]:
import mlrun

# Initialize the MLRun project object
project = mlrun.get_or_create_project(project_name, context="./")

> 2024-01-23 16:45:54,828 [info] Project loaded successfully: {'project_name': 'sagemaker-payment'}


In [3]:
mlrun.set_env_from_file("env.var")

In [4]:
import os

# getting our model training function
project.set_function('./src/train.py', requirements=['sagemaker'],
                     name='train', kind='job', image='mlrun/mlrun')

<mlrun.runtimes.kubejob.KubejobRuntime at 0x7fc39032cfd0>

In [5]:
evaluate_function = project.set_function(
    "src/evaluate.py",
    "evaluate",
    handler="evaluate",
    image="mlrun/mlrun",
    requirements=["xgboost"],
)

In [14]:
%%writefile src/workflow.py
import mlrun
from kfp import dsl
from mlrun.model import HyperParamOptions
from mlrun import get_or_create_ctx

from mlrun import (
    build_function,
    deploy_function,
    import_function,
    run_function,
)

    
@dsl.pipeline(
    name="Fraud Detection Pipeline",
    description="Detecting fraud from a transactions dataset"
)

def kfpipeline(evaluate_path):    

    project = mlrun.get_current_project()  

    project.get_function('train',sync=True)    
    

    train = project.run_function(name='train',
                               function='train',
                               handler='train',
                               params={},
                               outputs=["model"])
    
    ctx = get_or_create_ctx("kfp")
    ctx.logger.info(train.outputs)
    
    
    # evaluating the model
    evaluate_function = project.get_function("evaluate")
    
    #running
    evaluate_run = project.run_function(
    handler="evaluate",
    params={
        "model_path": train.outputs['model_path'],
        "model_name": "xgboost-model",
        "test_set": evaluate_path,
        "label_column": "transaction_category",        
    },
    returns=["classification_report: dataset"])
    
    
                                          
    # deploying serving function
    serving_function = project.get_function("serving")    
    

    if serving_function.spec.graph is None:

        # Set the topology and get the graph object:
        graph = serving_function.set_topology("flow", engine="async")

        # Add the steps:
        graph.to("XGBModelServer",
                 name="xgboost-model",
                 model_path=train.outputs['model_path']) \
             .to(handler="postprocess", name="postprocess").respond()


    # Set the desired requirements:
 
    # Deploy the serving function:
    project.deploy_function("serving")


Overwriting src/workflow.py


In [15]:
workflow_name = "workflow"
project.set_workflow(name=workflow_name, workflow_path="src/workflow.py")

In [16]:
import sagemaker
import boto3

sagemaker_role = os.environ["SAGEMAKER-ROLE"]

region = sagemaker.Session().boto_region_name
sm_client = boto3.client("sagemaker")
boto_session = boto3.Session(region_name=region)
sagemaker_session = sagemaker.session.Session(boto_session=boto_session, sagemaker_client=sm_client)
#role = sagemaker.get_execution_role()
role = sagemaker_role
bucket_prefix = "payment-classification"
s3_bucket = sagemaker_session.default_bucket()

INFO:botocore.credentials:Found credentials in environment variables.


In [17]:
s3_data = "s3://{}/{}/test/test.csv".format(s3_bucket, bucket_prefix)

In [18]:
s3_data

's3://sagemaker-us-east-1-934638699319/payment-classification/test/test.csv'

In [19]:
project.run(workflow_name,         
            watch=True,local = True,arguments= {"evaluate_path": s3_data})

> 2024-01-23 16:49:17,203 [info] Storing function: {'name': 'train', 'uid': 'e021fe04425642f1b92fccb7bdd28f96', 'db': None}
> 2024-01-23 16:49:17,772 [info] arn:aws:iam::934638699319:role/service-role/AmazonSageMaker-ExecutionRole-20231206T163342


KeyboardInterrupt: 

In [20]:
project.run(workflow_name,         
            watch=True,arguments= {"evaluate_path": s3_data})

> 2024-01-23 16:49:30,387 [info] Storing function: {'name': 'train', 'uid': '6ccb7080a6094b0ba114c948ba7c411e', 'db': 'http://mlrun-api:8080'}
> 2024-01-23 16:49:30,634 [error] error submitting task: 400 Client Error: Bad Request for url: http://mlrun-api:8080/api/v1/submit_job: details: MLRunInvalidArgumentError('This runtime kind (job) must have a valid image'), caused by: 400 Client Error: Bad Request for url: http://mlrun-api:8080/api/v1/submit_job
> 2024-01-23 16:49:30,635 [error] Failed remote run: {'error': "400 Client Error: Bad Request for url: http://mlrun-api:8080/api/v1/submit_job: details: MLRunInvalidArgumentError('This runtime kind (job) must have a valid image'), caused by: 400 Client Error: Bad Request for url: http://mlrun-api:8080/api/v1/submit_job"}
> 2024-01-23 16:49:30,635 [error] Workflow run failed: {'exc_info': ['Traceback (most recent call last):\n', '  File "/User/.pythonlibs/mlrun-base/lib/python3.9/site-packages/mlrun/errors.py", line 93, in raise_for_statu

d562f8519a504e1d9aa2768e589d76e5