In [3]:
print('test')

test


In [4]:
# Installs the XGBoost library
%pip install xgboost

[0mNote: you may need to restart the kernel to use updated packages.


In [5]:
# Import all libraries
import json
import os
import pathlib
import tarfile
import logging
import pickle
import boto3

In [6]:
# Imports necessary libraries for data manipulation, modeling, and performance evaluation
import pandas as pd
import xgboost
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score

In [12]:
#Set all script variables
base_dir = "/opt/ml/processing/"
default_bucket = 'z-tmp-tfc'

model1_s3 = f"s3://z-tmp-tfc/artifacts/apg-heart-model/pipelines-2g88zpizgdu0-APG-Heart-Train-SBUA4HYG0h/output/model.tar.gz"
model1_key = "/".join(model1_s3.split("/")[3:])
model1_fn = f"{base_dir}model/m1/model.tar.gz"

bucket = model1_s3.split("/")[2]

model_path = "/opt/ml/processing/model/m1/model.tar.gz"
model_base = "/opt/ml/processing/model/m1"

In [9]:
# Creates the directory structure to store the machine learning model.
pathlib.Path(f"{base_dir}/model/m1").mkdir(parents=True, exist_ok=True)

In [10]:
# Download the the Model tar file
s3 = boto3.resource("s3")
s3.Bucket(bucket).download_file(model1_key, model1_fn)

In [13]:
# Extracts machine learning model from a tar file and saves it to the specified directory
with tarfile.open(model_path) as tar:
    tar.extractall(path=model_base)

In [14]:
%ls -l /opt/ml/processing/model/m1/

total 488
-rw-r--r-- 1 root root 131545 Oct 29 00:21 model.tar.gz
-rw-r--r-- 1 root root 362344 Oct 23 21:16 xgboost-model


In [15]:
# Create the test folder
pathlib.Path(f"/opt/ml/processing/test").mkdir(parents=True, exist_ok=True)

In [16]:
# Download The Test file
f_bucket = "sagemaker-us-east-1-828238096174"
f_bucketpath = "apg-pipe-heart-p-ewf8t7lvhivm/1j55p30wb5xc/APG-Heart-Process/output/test/test.csv"
f_syspath= "/opt/ml/processing/test/test.csv"

s3.Bucket(f_bucket).download_file(f_bucketpath, f_syspath)

In [17]:
%ls -l /opt/ml/processing/test/

total 8
-rw-r--r-- 1 root root 4318 Oct 29 00:26 test.csv


In [18]:
# Loads a XGBoost classification model from the specified file path.
#loaded_model = xgboost.XGBClassifier(objective="multi:softmax", num_class=5)
loaded_model = xgboost.XGBClassifier()
loaded_model.load_model('/opt/ml/processing/model/m1/xgboost-model')

In [19]:
# Read test dataset into two separate dataframes, 1 for inferente test and 1 for comparative result test
test_path = "/opt/ml/processing/test/test.csv"
df = pd.read_csv(test_path, header=None)
dfTest = pd.read_csv(test_path, header=None)

In [20]:
# Extract the target variable and feature matrix from the test dataframe.
y_test = df.iloc[:, 0].to_numpy()
df.drop(df.columns[0], axis=1, inplace=True)
X_test = df.values

In [21]:
# Run predictions
predictions = loaded_model.predict(X_test)

In [22]:
print(predictions)

[2 2 2 0 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 0 1 2 2 1 2 2 2 2 2 2 2 1 2
 0 1 0 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 0 2 2 2 2 2 0 1 0 2 2
 2 1 1 2 0 2 1 2 2 1 2 2 0 2 2 2 2 2]


In [23]:
print(y_test)

[0 2 2 1 2 2 2 2 3 2 2 2 2 2 1 2 2 2 0 2 2 1 0 3 3 2 2 1 2 2 2 2 2 3 2 0 1
 2 0 2 0 1 2 2 2 2 2 2 2 0 2 0 2 1 0 3 2 2 1 2 0 2 2 1 2 2 2 0 2 0 2 2 1 2
 2 3 2 2 3 2 2 2 2 1 2 2 3 2 2 0 2 2]


In [24]:
# Creating classification evaluation report
acc = accuracy_score(y_test, predictions.round())

# The metrics reported can change based on the model used, but it must be a specific name per (https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html)
report_dict = {
    "multi_classification_metrics": {
        "accuracy": {
            "value": acc,
            "standard_deviation" : "NaN"
        },
    },
}

In [25]:
print(report_dict)

{'multi_classification_metrics': {'accuracy': {'value': 0.6086956521739131, 'standard_deviation': 'NaN'}}}
