In [62]:
import sagemaker
from sklearn.model_selection import train_test_split
import boto3
import pandas as pd

sm_boto3 = boto3.client("sagemaker")
sess = sagemaker.Session()
region = sess.boto_session.region_name
bucket = 'test-bucket-sagemaker1'
print("Using bucket " + bucket)

Using bucket test-bucket-sagemaker1


In [63]:
df = pd.read_csv('gabung.csv', sep = ';')

In [64]:
df.head()

Unnamed: 0,label,acc_x_avg,acc_y_avg,acc_z_avg,gyro_x_avg,gyro_y_avg,gyro_z_avg,acc_x_max,acc_y_max,acc_z_max,...,acc_z_min,gyro_x_min,gyro_y_min,gyro_z_min,acc_x_stdev,acc_y_stdev,acc_z_stdev,gyro_x_stdev,gyro_y_stdev,gyro_z_stdev
0,0,3.74,-10.05,-1.11,-12.91,-6.17,109.61,11.5,0.58,5.79,...,-10.49,-57.71,-156.36,-198.35,4.93,5.7,3.91,37.25,73.38,200.26
1,0,-0.36,-11.82,-0.94,0.15,13.21,-98.9,16.23,-5.94,7.99,...,-6.26,-31.61,-214.63,-188.46,5.86,2.5,2.62,18.37,75.87,42.75
2,0,3.51,-10.22,-1.43,-9.29,-20.64,89.14,13.61,-0.08,5.01,...,-13.74,-67.03,-210.98,-197.78,5.37,5.05,3.9,39.03,85.05,192.06
3,0,0.48,-11.45,-1.03,2.04,30.77,-94.57,11.87,-6.23,9.9,...,-7.75,-29.04,-78.07,-185.17,4.72,2.14,2.85,17.41,49.28,39.85
4,0,3.06,-10.33,-1.32,-13.62,-17.4,90.27,12.95,-3.08,4.44,...,-11.38,-60.8,-184.22,-200.72,5.78,4.62,3.93,31.07,104.45,199.75


In [65]:
# Import train_test_split function
from sklearn.preprocessing import StandardScaler
# Pembagian dataframe label dan feature
features = list(df.columns)
label = features.pop(0)
y = df[label]
X = df[features]
print(f"Data X = {X.shape}")
print(f"Data y = {y.shape}")
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

Data X = (153, 24)
Data y = (153,)


In [51]:
# Normalisasi Data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [66]:
testX = pd.DataFrame(X_test)
trainX = pd.DataFrame(X_train)
testX[label] = y_test
trainX[label] = y_train

In [67]:
trainX.to_csv("train-V-1.csv", index = False)
testX.to_csv("test-V-1.csv", index = False)

In [68]:
sk_prefix = "sagemaker/tugasakhir/sklearncontainer"
trainpath = sess.upload_data(
    path="train-V-1.csv", bucket=bucket, key_prefix=sk_prefix
)
testpath = sess.upload_data(
    path="test-V-1.csv", bucket=bucket, key_prefix=sk_prefix
)

In [70]:
%%writefile script.py

from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score
import sklearn
import joblib
import boto3
import pathlib
from io import StringIO
import argparse
import joblib
import os
import numpy as np
import pandas as pd

def model_fn(model_dir):
  clf = joblib.load(os.path.join(model_dir, "model.joblib"))
  return clf

if __name__ == "__main__":

  print("[INFO] Extracting arguments")
  parser = argparse.ArgumentParser()

  parser.add_argument("--kernel", type=str, default="rbf")
  parser.add_argument("--degree", type=int, default=3)
  parser.add_argument("--coef0", type=float, default=0.0)

  parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
  parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
  parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST"))
  parser.add_argument("--train-file", type=str, default="train-V-1.csv")
  parser.add_argument("--test-file", type=str, default="test-V-1.csv")

  args, _ = parser.parse_known_args()

  print("SKLearn version: ", sklearn.__version__)
  print("Joblib version: ", joblib.__version__)
 
  print()
  train_df = pd.read_csv(os.path.join(args.train, args.train_file))
  test_df = pd.read_csv(os.path.join(args.test, args.test_file))

  features = list(train_df.columns)
  label = features.pop(-1)

  print("Building training and testing datasets")
  print()
  X_train = train_df[features]
  X_test = test_df[features]
  y_train = train_df[label]
  y_test = test_df[label]

  print("Training SVM model")
  print()
  model = svm.SVC(kernel=args.kernel, degree=args.degree, coef0=args.coef0) 
  model.fit(X_train, y_train)
  print()

  model_path = os.path.join(args.model_dir, "model.joblib")
  joblib.dump(model, model_path)
  print("Model path: " + model_path)
  print()

  y_pred_test = model.predict(X_test)
  test_acc = accuracy_score(y_test, y_pred_test)
  test_rep = classification_report(y_test, y_pred_test)

  print()
  print("Testing accuracy: ", test_acc)
  print("Testing report: ")
  print(test_rep)

Overwriting script.py


In [71]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"

sklearn_estimator = SKLearn(
    entry_point="script.py",
    role="arn:aws:iam::194936698984:role/service-role/AmazonSageMaker-ExecutionRole-20240226T134696",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    framework_version=FRAMEWORK_VERSION,
    base_job_name="TestTA",
    hyperparameters={
        "kernel": "poly",
        "degree": 2,
        "coef0": 0.88423,
    },
    use_spot_instances = True,
    max_wait = 7200,
    max_run = 3600
)

In [72]:
sklearn_estimator.fit({"train": trainpath, "test": testpath}, wait=True)

INFO:sagemaker:Creating training-job with name: TestTA-2024-03-27-20-40-51-106


2024-03-27 20:41:08 Starting - Starting the training job...
2024-03-27 20:41:23 Starting - Preparing the instances for training...
2024-03-27 20:42:07 Downloading - Downloading input data...
2024-03-27 20:42:37 Downloading - Downloading the training image..2024-03-27 20:42:58,112 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2024-03-27 20:42:58,115 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-03-27 20:42:58,154 sagemaker_sklearn_container.training INFO     Invoking user training script.
2024-03-27 20:42:58,307 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-03-27 20:42:58,319 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-03-27 20:42:58,331 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-03-27 20:42:58,341 sagemaker-training-toolkit INFO     Invoking user script
Training Env:
{
    "ad

In [78]:
sklearn_estimator.latest_training_job.wait(logs="None")
artifact = sm_boto3.describe_training_job(
    TrainingJobName=sklearn_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

print("Model artifact persisted at " + artifact)


2024-03-27 20:43:13 Starting - Preparing the instances for training
2024-03-27 20:43:13 Downloading - Downloading the training image
2024-03-27 20:43:13 Training - Training image download completed. Training in progress.
2024-03-27 20:43:13 Uploading - Uploading generated training model
2024-03-27 20:43:13 Completed - Training job completed
Model artifact persisted at s3://sagemaker-ap-southeast-2-194936698984/TestTA-2024-03-27-20-40-51-106/output/model.tar.gz


In [79]:
from sagemaker.sklearn.model import SKLearnModel
from time import gmtime, strftime

model_name = "Custom-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model = SKLearnModel(
    name=model_name,
    model_data=artifact,
    role="arn:aws:iam::194936698984:role/service-role/AmazonSageMaker-ExecutionRole-20240226T134696",
    entry_point="script.py",
    framework_version=FRAMEWORK_VERSION,
)

In [80]:
model

<sagemaker.sklearn.model.SKLearnModel at 0x2bfede0b400>

In [81]:
endpoint_name = "Custom-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName={}".format(endpoint_name))

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name,
)

EndpointName=Custom-model-2024-03-27-20-50-19


INFO:sagemaker:Creating model with name: Custom-model-2024-03-27-20-50-13
INFO:sagemaker:Creating endpoint-config with name Custom-model-2024-03-27-20-50-19
INFO:sagemaker:Creating endpoint with name Custom-model-2024-03-27-20-50-19


-----!

In [82]:
testX[features][0:2].values.tolist()

[[4.46,
  -9.39,
  -0.85,
  -24.84,
  -17.81,
  131.27,
  12.53,
  0.71,
  5.71,
  28.05,
  166.85,
  376.41,
  -6.1,
  -17.69,
  -10.3,
  -66.05,
  -251.25,
  -183.23,
  5.01,
  4.59,
  3.7,
  27.81,
  101.1,
  199.86],
 [1.67,
  -10.83,
  -1.75,
  3.82,
  29.94,
  -96.3,
  29.46,
  1.18,
  1.1,
  41.89,
  126.09,
  36.24,
  -14.6,
  -18.55,
  -7.58,
  -20.45,
  -267.82,
  -272.08,
  10.69,
  4.89,
  2.14,
  17.3,
  86.73,
  99.92]]

In [83]:
print(predictor.predict(testX[features][0:2].values.tolist()))

[0 2]


In [84]:
print((testX[label][0:2].values.tolist()))

[0, 2]


In [85]:
sm_boto3.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': 'cccc7a04-3f88-4809-b1f4-80b7484eeeb7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cccc7a04-3f88-4809-b1f4-80b7484eeeb7',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 27 Mar 2024 20:57:28 GMT',
   'content-length': '0'},
  'RetryAttempts': 0}}