In [11]:
import os
import boto3
import json
from dotenv import load_dotenv

## 배포 상태 확인

In [4]:
load_dotenv()

True

In [7]:
sagemaker_client = boto3.client('sagemaker',
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
    aws_secret_access_key=os.getenv("AWS_SECRET_KEY"),
    region_name=os.getenv("AWS_REGION")
)
sagemaker_client 

<botocore.client.SageMaker at 0x174a060b5d0>

In [8]:
# 엔드포인트 상태 확인
def check_endpoint_status(endpoint_name):
    response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    status = response['EndpointStatus']
    print(f"Endpoint status: {status}")
    return status

# 엔드포인트 설명 보기
def get_endpoint_description(endpoint_name):
    response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    return response

In [9]:
check_endpoint_status("twin")

Endpoint status: InService


'InService'

In [10]:
get_endpoint_description("twin")

{'EndpointName': 'twin',
 'EndpointArn': 'arn:aws:sagemaker:ap-northeast-2:442042511020:endpoint/twin',
 'EndpointConfigName': 'twin',
 'ProductionVariants': [{'VariantName': 'AllTraffic',
   'DeployedImages': [{'SpecifiedImage': '763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.3.0-tgi2.2.0-gpu-py310-cu121-ubuntu22.04-v2.0',
     'ResolvedImage': '763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/huggingface-pytorch-tgi-inference@sha256:4becc345f722896a9cd15979df0dd98c1eeaf9900393b8a72e636e6bb870ca68',
     'ResolutionTime': datetime.datetime(2025, 2, 11, 15, 39, 13, 641000, tzinfo=tzlocal())}],
   'CurrentWeight': 1.0,
   'DesiredWeight': 1.0,
   'CurrentInstanceCount': 1,
   'DesiredInstanceCount': 1}],
 'EndpointStatus': 'InService',
 'CreationTime': datetime.datetime(2025, 2, 11, 15, 39, 13, 141000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2025, 2, 11, 15, 45, 25, 313000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': 'd

## 배포 모델 사용

In [12]:
def query_endpoint(endpoint_name, input_text):
    # SageMaker 런타임 클라이언트 생성
    runtime_client = boto3.client(
        'sagemaker-runtime',
        aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
        aws_secret_access_key=os.getenv("AWS_SECRET_KEY"),
        region_name=os.getenv("AWS_REGION"),
    )
    
    # 입력 데이터 준비
    payload = {
        "inputs": input_text
    }
    
    # 엔드포인트 호출
    response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType='application/json',
        Body=json.dumps(payload)
    )
    
    # 응답 처리
    result = json.loads(response['Body'].read().decode())
    return result

In [13]:
result = query_endpoint("twin", "what can you do for me?")
result

[{'generated_text': 'what can you do for me? asked A.\nhow can i do for you? asked B.\nA.) Can you speak Hindi?\nB.) Can you speak Sanskrit?'}]

In [15]:
result[0]["generated_text"]

'what can you do for me? asked A.\nhow can i do for you? asked B.\nA.) Can you speak Hindi?\nB.) Can you speak Sanskrit?'

## 모델 성능 모니터링

In [16]:
import datetime

def get_endpoint_metrics(endpoint_name):
    cloudwatch = boto3.client(
        'cloudwatch',
        aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
        aws_secret_access_key=os.getenv("AWS_SECRET_KEY"),
        region_name=os.getenv("AWS_REGION"),
    )
    
    # 주요 메트릭 조회
    metrics = cloudwatch.get_metric_data(
        MetricDataQueries=[
            {
                'Id': 'invocations',
                'MetricStat': {
                    'Metric': {
                        'Namespace': 'AWS/SageMaker',
                        'MetricName': 'Invocations',
                        'Dimensions': [
                            {'Name': 'EndpointName', 'Value': endpoint_name}
                        ]
                    },
                    'Period': 300,
                    'Stat': 'Sum'
                }
            },
            {
                'Id': 'latency',
                'MetricStat': {
                    'Metric': {
                        'Namespace': 'AWS/SageMaker',
                        'MetricName': 'ModelLatency',
                        'Dimensions': [
                            {'Name': 'EndpointName', 'Value': endpoint_name}
                        ]
                    },
                    'Period': 300,
                    'Stat': 'Average'
                }
            }
        ],
        StartTime=datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=1),
        EndTime=datetime.datetime.now(datetime.timezone.utc)
    )
    
    return metrics

In [17]:
metrics = get_endpoint_metrics("twin")
metrics

{'MetricDataResults': [{'Id': 'invocations',
   'Label': 'Invocations',
   'Timestamps': [],
   'Values': [],
   'StatusCode': 'Complete'},
  {'Id': 'latency',
   'Label': 'ModelLatency',
   'Timestamps': [],
   'Values': [],
   'StatusCode': 'Complete'}],
 'Messages': [],
 'ResponseMetadata': {'RequestId': '9cad89c2-15de-410e-8d48-8919b904fbc8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9cad89c2-15de-410e-8d48-8919b904fbc8',
   'content-type': 'text/xml',
   'content-length': '677',
   'date': 'Tue, 11 Feb 2025 07:01:56 GMT'},
  'RetryAttempts': 0}}