In [1]:
!pip install boto3 sagemaker


Defaulting to user installation because normal site-packages is not writeable
Collecting sagemaker
  Downloading sagemaker-2.178.0.tar.gz (861 kB)
     |████████████████████████████████| 861 kB 1.6 MB/s            
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting cloudpickle==2.2.1
  Downloading cloudpickle-2.2.1-py3-none-any.whl (25 kB)
Collecting google-pasta
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
     |████████████████████████████████| 57 kB 7.0 MB/s             
[?25hCollecting numpy<2.0,>=1.9.0
  Downloading numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     |████████████████████████████████| 18.3 MB 3.4 MB/s            
[?25hCollecting protobuf<5.0,>=3.12
  Downloading protobuf-4.24.1-cp37-abi3-manylinux2014_x86_64.whl (311 kB)
     |████████████████████████████████| 311 kB 83.3 MB/s            
[?25hCollecting smdebug_rulesconfig==1.0.1
  Downloading smdebug_rulesconfig-1.0.1-py2.py3-none-any.whl (20 kB)
Coll

### Delete Role

In [None]:
import boto3
import json
from botocore.exceptions import ClientError

# Specify the role name
role_name = 'SageMaker-Role'

# Create an IAM client
iam_client = boto3.client('iam')

try:
    response = iam_client.list_attached_role_policies(RoleName=role_name)
    attached_policies = response['AttachedPolicies']
    # Detach policies
    for policy in attached_policies:
        policy_arn = policy['PolicyArn']
        iam_client.detach_role_policy(RoleName=role_name, PolicyArn=policy_arn)
        print(f"Detached policy: {policy_arn}")

# Delete inline policies
    iam_client.delete_role(RoleName=role_name)
except Exception as e:
    print(e)

### Create Role

In [9]:
import boto3
import json
from botocore.exceptions import ClientError

# Specify the role name
role_name = 'SageMaker-Role'

# Managed policies for SageMaker
managed_policy_arns = [
    'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',
    'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess'  # Optional: If your data is in S3
]

# Create an IAM client
iam_client = boto3.client('iam')

# Create the role
try:
    assume_role_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "sagemaker.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
            }
        ]
    }
    
    create_role_response = iam_client.create_role(
        RoleName=role_name,
        AssumeRolePolicyDocument=json.dumps(assume_role_policy_document)
    )
    print("SageMaker role created successfully:", create_role_response['Role']['Arn'])
    
    # Attach managed policies to the role
    for policy_arn in managed_policy_arns:
        iam_client.attach_role_policy(
            RoleName=role_name,
            PolicyArn=policy_arn
        )
        print(f"Attached policy {policy_arn} to the role.")

except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Role with the same name already exists.")
    else:
        print("Error creating SageMaker role:", e)


SageMaker role created successfully: arn:aws:iam::188775091215:role/SageMaker-Role
Attached policy arn:aws:iam::aws:policy/AmazonSageMakerFullAccess to the role.
Attached policy arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess to the role.


In [11]:
role_arn = create_role_response['Role']['Arn']

In [1]:
role_arn="arn:aws:iam::188775091215:role/SageMaker-Role"

In [2]:
from sagemaker.pytorch import PyTorchModel
# Import packages
import boto3 # AWS Session Management
import sagemaker # SageMaker SDK

model = PyTorchModel(entry_point='inference.py',
                     model_data="s3://lightsketch-models-188775091215/models/model.tar.gz",
                     framework_version='1.12',
                     py_version='py38',
                     role=role_arn
                     )

In [3]:
from sagemaker.deserializers import JSONDeserializer
from datetime import datetime

INSTANCE_TYPE = 'ml.c5.xlarge'
ENDPOINT_NAME = 'yolov8-pytorch-' + str(datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-%f'))

predictor = model.deploy(initial_instance_count=1,
                         instance_type=INSTANCE_TYPE,
                         deserializer=JSONDeserializer(),
                         endpoint_name=ENDPOINT_NAME
                         )

-----------!

### Create SageMaker model with PyTorch inference container

In [None]:
from sagemaker.image_uris import retrieve

deploy_instance_type = 'ml.g4dn.xlarge'
pytorch_inference_image_uri = retrieve('pytorch',
                                       region,
                                       version='1.7.1',
                                       py_version='py3',
                                       instance_type = deploy_instance_type,
                                       accelerator_type=None,
                                       image_scope='inference')
print(pytorch_inference_image_uri)

In [None]:
container = pytorch_inference_image_uri
model_name = 'sagemaker-maskrcnn-{0}'.format(str(int(time.time())))
print(container)
print(model_name)

create_model_response = sm_client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = {
        'Image': container,
        'ModelDataUrl': model_artifact,
        'Environment': {
            'TS_MAX_REQUEST_SIZE': '100000000', #default max request size is 6 Mb for torchserve, need to update it to support the 70 mb input payload
            'TS_MAX_RESPONSE_SIZE': '100000000',
            'TS_DEFAULT_RESPONSE_TIMEOUT': '1000'
        }
    },    
)

### Real time hosted endpoint deployment and inference
Create an endpoint config name. Here we create one based on the date so it we can search endpoints based on creation time.

In [None]:
print(model_name)
endpoint_config_name = f"maskrcnnEndpointConfig-{strftime('%Y-%m-%d-%H-%M-%S', gmtime())}"
create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "VariantName": "variant1",
            "ModelName": model_name,
            "InstanceType": "ml.g4dn.xlarge",
            "InitialInstanceCount": 1
        }
    ]
)
print(f"Created EndpointConfig: {create_endpoint_config_response['EndpointConfigArn']}")

: 