In [43]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from constants import AWS_ROLE
from sagemaker.sklearn.model import SKLearnModel
import json
from sagemaker import ModelMetrics, MetricsSource
import boto3

In [29]:
hyperparameters={
        'n-estimators': str(100),
        'max-depth': str(10)
    }

s3_client = boto3.client('s3')

In [7]:
sagemaker_session = sagemaker.Session()
role = AWS_ROLE

In [16]:
metric_definitions = [{'Name': 'accuracy', 'Regex': 'accuracy=(.*)'}]

sklearn_estimator = SKLearn(
    entry_point='model_train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='0.23-1',
    py_version='py3',
    hyperparameters=hyperparameters,
    output_path='s3://mlops-model-artifact-bucket/',
    metric_definitions=metric_definitions
)

# Launch the training job
sklearn_estimator.fit({'train': 's3://mlops-bucket-files/iris.csv'})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-10-24-16-57-28-300


2024-10-24 16:57:31 Starting - Starting the training job...
2024-10-24 16:57:47 Starting - Preparing the instances for training...
2024-10-24 16:58:14 Downloading - Downloading input data...
2024-10-24 16:58:54 Downloading - Downloading the training image...
2024-10-24 16:59:30 Training - Training image download completed. Training in progress..2024-10-24 16:59:35,312 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2024-10-24 16:59:35,315 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-24 16:59:35,353 sagemaker_sklearn_container.training INFO     Invoking user training script.
2024-10-24 16:59:35,527 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-24 16:59:35,538 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-24 16:59:35,551 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-24 16:

In [4]:
sklearn_estimator.latest_training_job

<sagemaker.estimator._TrainingJob at 0x7f17dda48090>

In [24]:
training_job = sagemaker_session.describe_training_job(job_name=sklearn_estimator.latest_training_job.name)

In [25]:
artifact_path = training_job["ModelArtifacts"]["S3ModelArtifacts"]

In [30]:
accuracy_metric = training_job.get('FinalMetricDataList')[0]
metrics_and_hyperparams = {
    accuracy_metric['MetricName']: accuracy_metric['Value'],
    'hyperparameters': hyperparameters
}
metrics_json = json.dumps(metrics_and_hyperparams)

metrics_file_key = 'metrics.json'


bucket_name = 'mlops-model-artifact-bucket'
metrics_file_key = f"{training_job['TrainingJobName']}/metrics.json"

s3_client.put_object(
    Bucket=bucket_name,
    Key=metrics_file_key,
    Body=metrics_json,
    ContentType='application/json'
)


{'ResponseMetadata': {'RequestId': 'D2KNRFX4ZTSV98PF',
  'HostId': 'GDVnYmuusQiijUIHauomVHGyeTSfPOIkUC2qmskYnuma30/VeUOliPOviWnKdDQU/hlgtEU13BdPKjxAkZUJAcn+lkg/DCHF',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'GDVnYmuusQiijUIHauomVHGyeTSfPOIkUC2qmskYnuma30/VeUOliPOviWnKdDQU/hlgtEU13BdPKjxAkZUJAcn+lkg/DCHF',
   'x-amz-request-id': 'D2KNRFX4ZTSV98PF',
   'date': 'Thu, 24 Oct 2024 20:39:13 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"cb4a3211f502c59011e449da20bc9247"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"cb4a3211f502c59011e449da20bc9247"',
 'ServerSideEncryption': 'AES256'}

In [17]:
sagemaker_session.describe_training_job(job_name=sklearn_estimator.latest_training_job.name).get('FinalMetricDataList',[])

[{'MetricName': 'accuracy',
  'Value': 1.0,
  'Timestamp': datetime.datetime(2024, 10, 24, 18, 59, 36, tzinfo=tzlocal())}]

In [44]:
model = SKLearnModel(
    model_data=artifact_path, 
    role=role,
    #entry_point='train.py',
    framework_version='0.23-1',
    # py_version='py3'
)


model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri=f"s3://mlops-model-artifact-bucket/{training_job['TrainingJobName']}/metrics.json",
        content_type='application/json'
    )
)


model.register(
    model_package_group_name="mlops-demo-group",
    content_types=['text/csv'],
    response_types=['text/csv'],
    model_metrics=model_metrics,
    inference_instances=['ml.m5.xlarge'],
    transform_instances=['ml.m5.xlarge'],
    customer_metadata_properties={"ModelType": "Random Forest"}
)

INFO:sagemaker:Created S3 bucket: sagemaker-eu-north-1-535002873576


ClientError: An error occurred (ValidationException) when calling the CreateModelPackage operation: 1 validation error detected: Value 's3:/mlops-model-artifact-bucket/sagemaker-scikit-learn-2024-10-24-16-57-28-300/metrics.json' at 'modelMetrics.modelQuality.statistics.s3Uri' failed to satisfy constraint: Member must satisfy regular expression pattern: ^(https|s3)://([^/]+)/?(.*)$

In [None]:
# Deploy the model to an endpoint
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large',
    endpoint_name='mlops-demo-endpoint'
)

# Test the deployed endpoint
test_data = [[5.1, 3.5, 1.4, 0.2]]  # Example input for Iris dataset
predictions = predictor.predict(test_data)
print(f"Predictions: {predictions}")
