In [1]:
import os

import boto3
import sagemaker
from sagemaker.inputs import CreateModelInput
from sagemaker.model import Model
from sagemaker.session import Session
from sagemaker.workflow.model_step import ModelStep
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.sklearn.model import SKLearnModel

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()
default_bucket = sagemaker_session.default_bucket()

pipeline_session = PipelineSession()

In [3]:
os.makedirs("03_create_model", exist_ok=True) # Create folder for training code

In [4]:
%%writefile 03_create_model/start_file.py

from __future__ import print_function

import argparse
import joblib
import os
import pandas as pd

from sklearn.linear_model import LogisticRegression

# There is no default function to load the model
# Without this function the job will fail!
def model_fn(model_dir):
    """Deserialized and return fitted model

    Note that this should have the same name as the serialized model in the main method
    """
    clf = joblib.load(os.path.join(model_dir, "model.joblib"))
    return clf

# There is a default function to calculate the predictions.
# It calculates the class 0/1 instead of probability
# That is why we should override it with a custom function
def predict_fn(input_data, model):
    pred_prob = model.predict_proba(input_data)
    return pred_prob

Overwriting 03_create_model/start_file.py


In [5]:
# Create the SKLearnModel
sklearn_model = SKLearnModel(
    model_data="s3://sagemaker-eu-west-1-211125740051/pipelines-olx9nwd6ayq2-SimpleTrain-kFTu5RIag4/output/model.tar.gz",
    entry_point='start_file.py', # The file with the training code
    source_dir='03_create_model', # The folder with the training code
    role=role,
    framework_version='1.2-1',  # Replace with the appropriate sklearn version
    sagemaker_session=pipeline_session
)

In [6]:
step_model_create = ModelStep(
   name="MyModelCreationStep",
   step_args=sklearn_model.create(instance_type="ml.m5.large"),
)



In [8]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"03-simple-create-model"
pipeline = Pipeline(
    name=pipeline_name,
    steps=[step_model_create],
)

In [9]:
pipeline.upsert(role_arn=role)



{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:211125740051:pipeline/03-simple-create-model',
 'ResponseMetadata': {'RequestId': '164f020f-f1a1-406d-9fa2-32c5004cdfca',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '164f020f-f1a1-406d-9fa2-32c5004cdfca',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '90',
   'date': 'Thu, 11 Jul 2024 12:57:49 GMT'},
  'RetryAttempts': 0}}

In [10]:
execution = pipeline.start()