In [5]:
import os
import boto3

# set the environment variables
os.environ['AWS_PROFILE'] = "default"

# Check boto session
boto_sess = boto3.Session()
credentials = boto_sess.get_credentials()
#print("Access Key:", credentials.access_key)
#print("Secret Key:", credentials.secret_key)

# Create the clients 
sm_client = boto3.client("sagemaker")
iam_client = boto3.client("iam")

# Replace with your actual role name
role_name = "SageMaker-smaker_cli"

# Get role details
response = iam_client.get_role(RoleName=role_name)

# Extract the role ARN
role = response["Role"]["Arn"]
print("Role ARN:", role)


# List SageMaker endpoints to verify connection
response = sm_client.list_endpoints()
print(response)

Role ARN: arn:aws:iam::673671551738:role/service-role/SageMaker-smaker_cli
{'Endpoints': [], 'ResponseMetadata': {'RequestId': '610613b5-8c58-47be-9003-40fbda7799dc', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '610613b5-8c58-47be-9003-40fbda7799dc', 'content-type': 'application/x-amz-json-1.1', 'content-length': '16', 'date': 'Mon, 02 Jun 2025 04:27:48 GMT'}, 'RetryAttempts': 0}}


In [6]:
# Use Sagemaker SDK to create a session
import sagemaker

sess = sagemaker.Session()
region = sess._region_name

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/xdg-ubuntu/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/nachiketa/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::673671551738:role/service-role/SageMaker-smaker_cli
sagemaker bucket: sagemaker-us-east-1-673671551738
sagemaker session region: us-east-1


In [7]:
# Get the sagemaker default s3 bucket we are going to use.
bucket = sess.default_bucket() 
print(bucket)
s3_location = f"s3://{bucket}/djl-serving/"
print(f"Sagemaker default bucket : {s3_location}")

# Instead of default bucket, we use our own custom bucket
bucket = "deepseek-math-repo"
s3_location = f"s3://{bucket}/djl-serving/"
print(f"Sagemaker custom bucket : {s3_location}")

sagemaker-us-east-1-673671551738
Sagemaker default bucket : s3://sagemaker-us-east-1-673671551738/djl-serving/
Sagemaker custom bucket : s3://deepseek-math-repo/djl-serving/


In [8]:
# Get the uri of the DJL-Deepspeed image
from sagemaker import image_uris

img_uri = image_uris.retrieve(framework="djl-deepspeed", region=region, version="0.21.0")
print(img_uri)

763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117


Create the script to create the model from s3 bucket

In [17]:
%%writefile model.py

from djl_python import Input, Output
import os
import deepspeed
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

predictor = None


def get_model(properties):
    model_name = "s3://deepseek-math-7b/"
    tensor_parallel = properties["tensor_parallel_degree"]
    local_rank = int(os.getenv("LOCAL_RANK", "0"))
    model = AutoModelForCausalLM.from_pretrained(
        model_name, revision="float32", torch_dtype=torch.float32
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    model = deepspeed.init_inference(
        model,
        mp_size=tensor_parallel,
        dtype=model.dtype,
        replace_method="auto",
        replace_with_kernel_inject=True,
    )
    generator = pipeline(
        task="text-generation", model=model, tokenizer=tokenizer, device=local_rank
    )
    return generator


def handle(inputs: Input) -> None:
    global predictor
    if not predictor:
        predictor = get_model(inputs.get_properties())

    if inputs.is_empty():
        # Model server makes an empty call to warmup the model on startup
        return None

    data = inputs.get_as_string()
    result = predictor(data, do_sample=True, max_new_tokens=256)
    return Output().add(result)

Writing model.py


### Serving properties

In [18]:
%%writefile serving.properties
engine = DeepSpeed
option.tensor_parallel_degree = 2

Writing serving.properties


compress the model and serving properties.

In [19]:
%%sh
if [ -d dsk-r ]; then
  rm -d -r dsk-r
fi #always start fresh

mkdir -p dsk-r
mv model.py dsk-r
mv serving.properties dsk-r
tar -czvf dsk-r.tar.gz dsk-r/
#aws s3 cp dsk-r.tar.gz {path}

dsk-r/
dsk-r/serving.properties
dsk-r/model.py


In [20]:
# Upload the compressed folder to the s3 location
model_tar_url = sagemaker.s3.S3Uploader.upload("dsk-r.tar.gz", s3_location)