# Model Deployment and Inference

Deploy and test the trained model

In [None]:
import sagemaker
from sagemaker.sklearn import SKLearnModel
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer
import pandas as pd
import numpy as np
import time

sess = sagemaker.Session()

In [None]:
# Retrieve variables
%store -r best_model_data
%store -r test_s3_path
%store -r role

print(f'Model: {best_model_data}')
print(f'Test data: {test_s3_path}')

In [None]:
# Create model
sklearn_model = SKLearnModel(
    model_data=best_model_data,
    role=role,
    entry_point='../scripts/inference.py',
    framework_version='1.2-1',
    py_version='py3'
)

print('Model object created')

In [None]:
# Deploy endpoint
endpoint_name = f'demo-endpoint-{int(time.time())}'

predictor = sklearn_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large',
    endpoint_name=endpoint_name,
    serializer=CSVSerializer(),
    deserializer=JSONDeserializer()
)

print(f'Endpoint deployed: {endpoint_name}')

In [None]:
# Test inference
test_df = pd.read_csv(test_s3_path)
X_test = test_df.drop('target', axis=1)
y_test = test_df['target']

sample = X_test.iloc[0].values
result = predictor.predict(sample)

print(f'Prediction: {result["prediction"]}')
print(f'Confidence: {result["confidence"]:.4f}')
print(f'Actual: {y_test.iloc[0]}')

In [None]:
# Batch predictions
n_samples = 10
predictions = []

for i in range(n_samples):
    sample = X_test.iloc[i].values
    result = predictor.predict(sample)
    predictions.append(result['prediction'])

comparison = pd.DataFrame({
    'Actual': y_test.iloc[:n_samples].values,
    'Predicted': predictions
})
comparison['Correct'] = comparison['Actual'] == comparison['Predicted']

print(comparison)
print(f'Accuracy: {comparison["Correct"].mean():.2%}')

In [None]:
# Latency test
latencies = []
n_requests = 50

for i in range(n_requests):
    sample = X_test.iloc[i % len(X_test)].values
    start = time.time()
    predictor.predict(sample)
    latencies.append((time.time() - start) * 1000)

print(f'Mean latency: {np.mean(latencies):.2f}ms')
print(f'P95 latency: {np.percentile(latencies, 95):.2f}ms')

In [None]:
# Cleanup (uncomment to delete)
# predictor.delete_endpoint()
# predictor.delete_model()

print(f'To delete: aws sagemaker delete-endpoint --endpoint-name {endpoint_name}')