ENSURE THE SAME VERSION OF SCIKIT

In [None]:
pip install scikit-learn==1.2.1

IMPORT LIBRARIES

In [11]:
import boto3
import os
import sagemaker
import tarfile
from sagemaker.transformer import Transformer
from sagemaker import get_execution_role
import pandas as pd
import joblib

SETTINGS

In [7]:
# The S3 bucket name and the file key
model_bucket_name = 'sagemaker-eu-west-1-211125740051'
model_file_key = 'trainin-job-simple-03-2024-06-27-07-46-27-613/output/model.tar.gz'

# The S3 bucket name and the file key
input_bucket_name = 'sagemaker-bucket-ds'
input_file_key = 'training-jobs/data/inference_input/iris_inference.csv'

local_directory = "06_manual_prediction" # Local path where the file will be saved

CREATE BASIC OBJECTS

In [3]:
s3 = boto3.client('s3')

DOWNLOAD MODEL

In [8]:
os.makedirs(local_directory, exist_ok=True) # Create folder for training code

local_file_path = os.path.join(local_directory, 'model.tar.gz')

# Download the file from S3
s3.download_file(model_bucket_name, model_file_key, local_file_path)

print(f"File downloaded to {local_file_path}")

File downloaded to 06_manual_prediction/model.tar.gz


UNPACK FILE

In [9]:
# Unpack the tar.gz file
with tarfile.open(local_file_path, 'r:gz') as tar:
    tar.extractall(path=local_directory)

print(f"File unpacked to {local_directory}")

File unpacked to 06_manual_prediction


LOAD MODEL

In [13]:
# Define the directory where the model is saved
model_path = os.path.join(local_directory, 'model.joblib')

# Load the model
model = joblib.load(model_path)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


COPY INPUT DATA

In [14]:
# Local path where the file will be saved
local_file_path = os.path.join(local_directory, 'iris_inference.csv')

# Download the file from S3
s3.download_file(input_bucket_name, input_file_key, local_file_path)

LOAD INPUT DATA

In [15]:
# Load the CSV file without headers
df = pd.read_csv(local_file_path, header=None)

# Print the DataFrame to confirm
print(df)

      0    1    2    3
0   5.7  2.8  4.1  1.3
1   4.9  3.0  1.4  0.2
2   6.3  3.3  4.7  1.6
3   4.7  3.2  1.3  0.2
4   5.0  3.4  1.5  0.2
5   5.8  4.0  1.2  0.2
6   4.6  3.6  1.0  0.2
7   5.2  3.4  1.4  0.2
8   4.4  3.2  1.3  0.2
9   6.0  2.9  4.5  1.5
10  5.5  2.3  4.0  1.3
11  5.1  3.4  1.5  0.2
12  5.6  3.0  4.1  1.3
13  4.9  2.4  3.3  1.0
14  4.5  2.3  1.3  0.3
15  6.4  3.2  4.5  1.5
16  5.6  3.0  4.5  1.5
17  5.1  3.7  1.5  0.4
18  5.4  3.7  1.5  0.2
19  5.4  3.4  1.7  0.2
20  6.7  3.0  5.0  1.7
21  6.1  2.9  4.7  1.4
22  4.9  3.6  1.4  0.1
23  5.1  3.3  1.7  0.5
24  6.0  3.4  4.5  1.6
25  5.4  3.0  4.5  1.5
26  5.7  3.8  1.7  0.3
27  5.7  2.8  4.5  1.3
28  5.7  2.9  4.2  1.3
29  4.4  3.0  1.3  0.2
30  5.6  2.5  3.9  1.1
31  5.0  2.3  3.3  1.0
32  5.0  3.2  1.2  0.2


CALCULATE THE PREDICTIONS

In [18]:
# Make predictions
predictions = model.predict_proba(df)

print("Predictions:")
print(predictions)

Predictions:
[[0.04495852 0.95504148]
 [0.95079701 0.04920299]
 [0.0144583  0.9855417 ]
 [0.96462525 0.03537475]
 [0.95187472 0.04812528]
 [0.97549965 0.02450035]
 [0.98378045 0.01621955]
 [0.95767395 0.04232605]
 [0.96698484 0.03301516]
 [0.01901776 0.98098224]
 [0.04258326 0.95741674]
 [0.9507715  0.0492285 ]
 [0.05129167 0.94870833]
 [0.19452197 0.80547803]
 [0.94077574 0.05922426]
 [0.02050123 0.97949877]
 [0.02208394 0.97791606]
 [0.95204003 0.04795997]
 [0.95526579 0.04473421]
 [0.92588066 0.07411934]
 [0.00601314 0.99398686]
 [0.01393909 0.98606091]
 [0.96699195 0.03300805]
 [0.91072398 0.08927602]
 [0.02345625 0.97654375]
 [0.02313677 0.97686323]
 [0.93153969 0.06846031]
 [0.02221484 0.97778516]
 [0.03989334 0.96010666]
 [0.96312322 0.03687678]
 [0.06324702 0.93675298]
 [0.18212384 0.81787616]
 [0.96821302 0.03178698]]
