In [1]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from constants import AWS_ROLE, DATA_BUCKET_NAME, DATA_FILE_NAME, ARTIFACT_BUCKET_NAME
from sagemaker.sklearn.model import SKLearnModel
import json
from sagemaker import ModelMetrics, MetricsSource
import boto3
import pandas as pd
import numpy as np
from sagemaker.inputs import TrainingInput
from sagemaker.serializers import CSVSerializer

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/supreet/.config/sagemaker/config.yaml


In [2]:
s3_client = boto3.client('s3')
sagemaker_session = sagemaker.Session()

In [3]:
data_bucket = DATA_BUCKET_NAME
role = AWS_ROLE
artifact_bucket = ARTIFACT_BUCKET_NAME

In [31]:
s3_client.download_file(
    data_bucket,
    DATA_FILE_NAME,
    DATA_FILE_NAME,
)

In [42]:
iris = pd.read_csv(f"./{DATA_FILE_NAME}")
iris['variety'] = iris['variety'].astype('category').cat.codes
iris = pd.concat(
    [iris["variety"], iris.drop(["variety"], axis=1)], axis=1
)

In [65]:
train_data, validation_data, test_data = np.split(
    iris.sample(frac=1, random_state=1729),
    [int(0.7 * len(iris)), int(0.9 * len(iris))],
)
train_data.to_csv("train.csv", header=False, index=False)
validation_data.to_csv("validation.csv", header=False, index=False)
test_data.to_csv("test.csv", header=False, index=False)

  return bound(*args, **kwds)


In [44]:
s3_client.upload_file("train.csv", data_bucket, "train.csv")
s3_client.upload_file("validation.csv", data_bucket, "validation.csv")

In [54]:
train_input = TrainingInput(f"s3://{data_bucket}/train.csv", content_type="text/csv")
validation_input = TrainingInput(f"s3://{data_bucket}/validation.csv", content_type="text/csv")

In [50]:
num_classes = len(set(iris.variety))

In [46]:
container = sagemaker.image_uris.retrieve("xgboost", sagemaker_session.boto_region_name, version="1.0-1")

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3
INFO:sagemaker.image_uris:Defaulting to only supported image scope: cpu.


In [55]:
xgb = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    output_path=f"s3://{artifact_bucket}/",
    sagemaker_session=sagemaker_session,
)

xgb.set_hyperparameters(
    max_depth=5,
    eta=0.2,
    objective="multi:softmax",
    num_class=num_classes,
    num_round=100,
    verbosity=1
)

xgb.fit({"train": train_input, "validation": validation_input})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-10-27-16-02-45-080


2024-10-27 16:02:47 Starting - Starting the training job...
2024-10-27 16:03:21 Downloading - Downloading input data...
2024-10-27 16:03:41 Downloading - Downloading the training image...
2024-10-27 16:04:22 Training - Training image download completed. Training in progress...[2024-10-27 16:04:33.624 ip-10-0-195-99.eu-north-1.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training
INFO:sagemaker-containers:Failed to parse hyperparameter objective value multi:softmax to Json.
Returning the value itself
INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)
INFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode
INFO:root:Determined delimiter of CSV input is ','
INFO:root:Determined delimiter of CSV input is ','
INFO:root:Determined delimiter of CSV input is ','
INFO:root:Determined delimiter of CSV input is ','
INFO:root:Single node training.

In [68]:
predictor = xgb.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    serializer=CSVSerializer()
)

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2024-10-27-16-49-00-837
INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2024-10-27-16-49-00-837
INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2024-10-27-16-49-00-837


------!

In [96]:
predictor.endpoint

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


'sagemaker-xgboost-2024-10-27-16-49-00-837'

In [75]:
def predict(data, rows=500):
    split_array = np.array_split(data, int(data.shape[0] / float(rows) + 1))
    predictions = ""
    for array in split_array:
        predictions = "".join([predictions, predictor.predict(array).decode("utf-8")])

    return predictions.split("\n")[:-1]

In [76]:
predictions = predict(test_data.to_numpy()[:, 1:])

In [79]:
test_data.to_numpy()[:, 1:]

array([[5.1, 3.8, 1.9, 0.4],
       [6. , 2.7, 5.1, 1.6],
       [5. , 2.3, 3.3, 1. ],
       [6.4, 2.7, 5.3, 1.9],
       [6.1, 3. , 4.6, 1.4],
       [5. , 3.2, 1.2, 0.2],
       [5.2, 2.7, 3.9, 1.4],
       [5.5, 2.5, 4. , 1.3],
       [5.9, 3. , 5.1, 1.8],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.8, 4.8, 1.8],
       [5.1, 3.7, 1.5, 0.4],
       [4.9, 3.6, 1.4, 0.1],
       [7.1, 3. , 5.9, 2.1],
       [6.9, 3.1, 5.1, 2.3]])

In [92]:
from sklearn.metrics import accuracy_score

test_data = pd.read_csv("test.csv", header=None)
X_test = test_data.iloc[:, 1:]  
y_test = test_data.iloc[:, 0]   


X_test_csv = X_test.to_csv(index=False, header=False)

# Make predictions
predictions = list(map(int, map(float, predictor.predict(X_test_csv).decode("utf-8").split(","))))


In [94]:
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 93.33%


# Extra Code below to register

In [18]:
sklearn_estimator = SKLearn(
    entry_point='model_train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    framework_version='0.23-1',
    py_version='py3',
    hyperparameters=hyperparameters,
    output_path='s3://mlops-model-artifact-bucket/'
)

# Launch the training job
sklearn_estimator.fit({'train': 's3://mlops-bucket-files/iris.csv'})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-10-27-11-17-46-591


2024-10-27 11:17:50 Starting - Starting the training job...
2024-10-27 11:18:23 Downloading - Downloading input data...
2024-10-27 11:18:43 Downloading - Downloading the training image...
2024-10-27 11:19:19 Training - Training image download completed. Training in progress..2024-10-27 11:19:26,100 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2024-10-27 11:19:26,102 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-27 11:19:26,141 sagemaker_sklearn_container.training INFO     Invoking user training script.
2024-10-27 11:19:26,293 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-27 11:19:26,305 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-27 11:19:26,317 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-10-27 11:19:26,325 sagemaker-training-toolkit INFO     Invoking user script
Trai

In [4]:
sklearn_estimator.latest_training_job

<sagemaker.estimator._TrainingJob at 0x7f17dda48090>

In [19]:
training_job = sagemaker_session.describe_training_job(job_name=sklearn_estimator.latest_training_job.name)

In [25]:
training_job

{'TrainingJobName': 'sagemaker-scikit-learn-2024-10-27-11-17-46-591',
 'TrainingJobArn': 'arn:aws:sagemaker:eu-north-1:535002873576:training-job/sagemaker-scikit-learn-2024-10-27-11-17-46-591',
 'ModelArtifacts': {'S3ModelArtifacts': 's3://mlops-model-artifact-bucket/sagemaker-scikit-learn-2024-10-27-11-17-46-591/output/model.tar.gz'},
 'TrainingJobStatus': 'Completed',
 'SecondaryStatus': 'Completed',
 'HyperParameters': {'max-depth': '"10"',
  'n-estimators': '"100"',
  'sagemaker_container_log_level': '20',
  'sagemaker_job_name': '"sagemaker-scikit-learn-2024-10-27-11-17-46-591"',
  'sagemaker_program': '"model_train.py"',
  'sagemaker_region': '"eu-north-1"',
  'sagemaker_submit_directory': '"s3://mlops-model-artifact-bucket/sagemaker-scikit-learn-2024-10-27-11-17-46-591/source/sourcedir.tar.gz"'},
 'AlgorithmSpecification': {'TrainingImage': '662702820516.dkr.ecr.eu-north-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3',
  'TrainingInputMode': 'File',
  'MetricDefinitions':

In [20]:
artifact_path = training_job["ModelArtifacts"]["S3ModelArtifacts"]

In [21]:
accuracy_metric = training_job.get('FinalMetricDataList')[0]
metrics_and_hyperparams = {
    accuracy_metric['MetricName']: accuracy_metric['Value'],
    'hyperparameters': hyperparameters
}
metrics_json = json.dumps(metrics_and_hyperparams)

metrics_file_key = 'metrics.json'


bucket_name = 'mlops-model-artifact-bucket'
metrics_file_key = f"{training_job['TrainingJobName']}/metrics.json"

s3_client.put_object(
    Bucket=bucket_name,
    Key=metrics_file_key,
    Body=metrics_json,
    ContentType='application/json'
)


{'ResponseMetadata': {'RequestId': 'YWMN614EVP51NM89',
  'HostId': 'QqOjc5izqHZnr/BWEJBL9ScxI69/560gXDV5/2yvEo3b4Ea4gnkhQ631FXo1EiJNEfxHIdqeZbjePtmtD1/4Jdi2oH/BSsKlGAUjebCONPY=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'QqOjc5izqHZnr/BWEJBL9ScxI69/560gXDV5/2yvEo3b4Ea4gnkhQ631FXo1EiJNEfxHIdqeZbjePtmtD1/4Jdi2oH/BSsKlGAUjebCONPY=',
   'x-amz-request-id': 'YWMN614EVP51NM89',
   'date': 'Sun, 27 Oct 2024 11:20:50 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"b9863fad01c9be38806317056722173d"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"b9863fad01c9be38806317056722173d"',
 'ServerSideEncryption': 'AES256'}

In [22]:
sagemaker_session.describe_training_job(job_name=sklearn_estimator.latest_training_job.name).get('FinalMetricDataList',[])

[{'MetricName': 'accuracy',
  'Value': 1.0,
  'Timestamp': datetime.datetime(2024, 10, 27, 12, 19, 27, tzinfo=tzlocal())}]

In [23]:
model = SKLearnModel(
    model_data=artifact_path, 
    role=role,
    #entry_point='train.py',
    framework_version='0.23-1',
    # py_version='py3'
)


model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri=f"s3://mlops-model-artifact-bucket/{training_job['TrainingJobName']}/metrics.json",
        content_type='application/json'
    )
)


model.register(
    model_package_group_name="mlops-demo-group",
    content_types=['text/csv'],
    response_types=['text/csv'],
    model_metrics=model_metrics,
    inference_instances=['ml.m5.large'],
    transform_instances=['ml.m5.large'],
    customer_metadata_properties={"ModelType": "Random Forest"}
)

<sagemaker.model.ModelPackage at 0x7fafe7dffc10>

In [26]:
# Deploy the model to an endpoint
predictor = model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large',
    endpoint_name=f"{training_job['TrainingJobName']}-endpoint",
)

# Test the deployed endpoint
test_data = [[5.1, 3.5, 1.4, 0.2]]  # Example input for Iris dataset
predictions = predictor.predict(test_data)
print(f"Predictions: {predictions}")


INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-10-27-11-37-42-955
INFO:sagemaker:Creating endpoint-config with name sagemaker-scikit-learn-2024-10-27-11-17-46-591-endpoint
INFO:sagemaker:Creating endpoint with name sagemaker-scikit-learn-2024-10-27-11-17-46-591-endpoint


--------------------------------------------*

ERROR:sagemaker:Please check the troubleshooting guide for common errors: https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-python-sdk-troubleshooting.html#sagemaker-python-sdk-troubleshooting-create-endpoint


UnexpectedStatusException: Error hosting endpoint sagemaker-scikit-learn-2024-10-27-11-17-46-591-endpoint: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.. Try changing the instance type or reference the troubleshooting page https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-troubleshooting.html