In [1]:
import boto3
import os
import sagemaker
import tarfile
from sagemaker.transformer import Transformer
from sagemaker.xgboost import XGBoostModel
from sagemaker import get_execution_role
import xgboost as xgb
import pandas as pd

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


SETTINGS

In [2]:
model_data = 's3://sagemaker-eu-west-1-211125740051/xgboost-05-2024-07-01-10-55-25-128/output/model.tar.gz' # Specify the S3 path where the trained model is stored

# The S3 bucket name and the file key
model_bucket_name = 'sagemaker-eu-west-1-211125740051'
model_file_key = 'xgboost-05-2024-07-01-10-55-25-128/output/model.tar.gz'

# The S3 bucket name and the file key
input_bucket_name = 'sagemaker-bucket-ds'
input_file_key = 'training-jobs/data/inference_input/iris_inference.csv'

local_directory = "01_manual_prediction" # Local path where the file will be saved

CREATE BASIC OBJECTS

In [3]:
s3 = boto3.client('s3')

DOWNLOAD MODEL

In [4]:
os.makedirs("01_manual_prediction", exist_ok=True) # Create folder for training code

local_file_path = os.path.join(local_directory, 'model.tar.gz')

# Download the file from S3
s3.download_file(model_bucket_name, model_file_key, local_file_path)

print(f"File downloaded to {local_file_path}")

File downloaded to 01_manual_prediction/model.tar.gz


UNPACK FILE

In [5]:
# Unpack the tar.gz file
with tarfile.open(local_file_path, 'r:gz') as tar:
    tar.extractall(path=local_directory)

print(f"File unpacked to {local_directory}")

File unpacked to 01_manual_prediction


LOAD MODEL

In [6]:
# Define the directory where the model is saved
model_path = os.path.join(local_directory, 'model.xgb')

# Load the model
bst = xgb.Booster()
bst.load_model(model_path)

COPY INPUT DATA

In [7]:
# Local path where the file will be saved
local_file_path = os.path.join(local_directory, 'iris_inference.csv')

# Download the file from S3
s3.download_file(input_bucket_name, input_file_key, local_file_path)

LOAD INPUT DATA

In [8]:
# Load the CSV file without headers
df = pd.read_csv(local_file_path, header=None)

# Print the DataFrame to confirm
print(df)

      0    1    2    3
0   5.7  2.8  4.1  1.3
1   4.9  3.0  1.4  0.2
2   6.3  3.3  4.7  1.6
3   4.7  3.2  1.3  0.2
4   5.0  3.4  1.5  0.2
5   5.8  4.0  1.2  0.2
6   4.6  3.6  1.0  0.2
7   5.2  3.4  1.4  0.2
8   4.4  3.2  1.3  0.2
9   6.0  2.9  4.5  1.5
10  5.5  2.3  4.0  1.3
11  5.1  3.4  1.5  0.2
12  5.6  3.0  4.1  1.3
13  4.9  2.4  3.3  1.0
14  4.5  2.3  1.3  0.3
15  6.4  3.2  4.5  1.5
16  5.6  3.0  4.5  1.5
17  5.1  3.7  1.5  0.4
18  5.4  3.7  1.5  0.2
19  5.4  3.4  1.7  0.2
20  6.7  3.0  5.0  1.7
21  6.1  2.9  4.7  1.4
22  4.9  3.6  1.4  0.1
23  5.1  3.3  1.7  0.5
24  6.0  3.4  4.5  1.6
25  5.4  3.0  4.5  1.5
26  5.7  3.8  1.7  0.3
27  5.7  2.8  4.5  1.3
28  5.7  2.9  4.2  1.3
29  4.4  3.0  1.3  0.2
30  5.6  2.5  3.9  1.1
31  5.0  2.3  3.3  1.0
32  5.0  3.2  1.2  0.2


CALCULATE THE PREDICTIONS

In [9]:
# Convert the DataFrame to DMatrix
dmat = xgb.DMatrix(df)

# Make predictions
predictions = bst.predict(dmat)

print("Predictions:")
print(predictions)

Predictions:
[0.9136281 0.0870214 0.9136281 0.0870214 0.0870214 0.0870214 0.0870214
 0.0870214 0.0870214 0.9136281 0.9136281 0.0870214 0.9136281 0.9136281
 0.0870214 0.9136281 0.9136281 0.0870214 0.0870214 0.0870214 0.9136281
 0.9136281 0.0870214 0.0870214 0.9136281 0.9136281 0.0870214 0.9136281
 0.9136281 0.0870214 0.9136281 0.9136281 0.0870214]
