In [103]:
import sagemaker
from sklearn.model_selection import train_test_split
import boto3
import pandas as pd
from sagemaker.sklearn.estimator import SKLearn
import yaml

In [104]:
with open("env-vars.yml") as f:
    env_vars = yaml.safe_load(f)

sagemaker_role = env_vars['sagemaker-role-arn']
sagemaker_profile = env_vars['sagemaker-profile']
bucket_name = env_vars['s3-bucket-name']

In [105]:
boto3_client = boto3.client('sagemaker')
boto3.setup_default_session(profile_name=sagemaker_profile)
session = sagemaker.Session()
region = session.boto_session.region_name
bucket = bucket_name
print(f"Using bucket {bucket}")

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


Using bucket mobile-price


In [71]:
train_data = pd.read_csv("data/train.csv")

In [72]:
train_data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,internal_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [73]:
features = list(train_data.columns)
label = features.pop(-1)

x = train_data[features]
y = train_data[label]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=0)

In [74]:
train_x = pd.DataFrame(X_train)
train_x[label] = y_train

test_x = pd.DataFrame(X_test)
test_x[label] = y_test

In [75]:
train_x.to_csv("data/mobile-price-train.csv", index=False)
test_x.to_csv("data/mobile-price-test.csv", index=False)

In [76]:
bucket_prefix = 'mobile-price-data'
train_path = session.upload_data(path='data/mobile-price-train.csv', bucket=bucket, key_prefix=bucket_prefix)
test_path = session.upload_data(path='data/mobile-price-test.csv', bucket=bucket, key_prefix=bucket_prefix)

In [87]:
sklearn_estimator = SKLearn(
    entry_point='sagemaker_script.py',
    role=sagemaker_role,
    instance_count=1,
    instance_type="ml.m4.xlarge",
    framework_version='1.0-1',
    base_job_name="mobile-price-clf-model",
    hyperparameters= {"n_estimators":100, "random_state": 0},
    use_spot_instances=True,
    max_wait=7200,
    max_run=3600
)

In [88]:
print(train_path)
print(test_path)

s3://mobile-price/mobile-price-data/mobile-price-train.csv
s3://mobile-price/mobile-price-data/mobile-price-test.csv


In [89]:
sklearn_estimator.fit({"train": train_path, "test": test_path}, wait=True)

Using provided s3_resource


INFO:sagemaker:Creating training-job with name: mobile-price-clf-model-2023-08-04-15-24-54-939


2023-08-04 15:24:55 Starting - Starting the training job...
2023-08-04 15:25:21 Starting - Preparing the instances for training.........
2023-08-04 15:26:33 Downloading - Downloading input data...
2023-08-04 15:27:13 Training - Downloading the training image...
2023-08-04 15:27:54 Uploading - Uploading generated training model[34m2023-08-04 15:27:47,702 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-08-04 15:27:47,706 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-08-04 15:27:47,716 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-08-04 15:27:47,961 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-08-04 15:27:47,976 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-08-04 15:27:47,993 sagemaker-training-toolkit INFO     No GPUs detected (normal if no

In [93]:
sklearn_estimator.latest_training_job.wait(logs="None")
artifact = session.describe_training_job(job_name=sklearn_estimator.latest_training_job.name)["ModelArtifacts"]["S3ModelArtifacts"]

print(f"Model artifact persisted at {artifact}")


2023-08-04 15:28:05 Starting - Preparing the instances for training
2023-08-04 15:28:05 Downloading - Downloading input data
2023-08-04 15:28:05 Training - Training image download completed. Training in progress.
2023-08-04 15:28:05 Uploading - Uploading generated training model
2023-08-04 15:28:05 Completed - Training job completed
Model artifact persisted at s3://sagemaker-us-east-1-882605792865/mobile-price-clf-model-2023-08-04-15-24-54-939/output/model.tar.gz


In [94]:
from sagemaker.sklearn.model import SKLearnModel
from time import gmtime, strftime

model_name = 'mobile-price-clf-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model = SKLearnModel(
    name=model_name,
    model_data=artifact,
    role=sagemaker_role,
    entry_point="sagemaker_script.py",
    framework_version="1.0-1"
)

In [96]:
endpoint_name = 'mobile-price-clf-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(f"Endpoint Name = {endpoint_name}")

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name
)

Endpoint Name = mobile-price-clf-2023-08-04-23-45-24


INFO:sagemaker:Creating model with name: mobile-price-clf-2023-08-04-23-43-58
INFO:sagemaker:Creating endpoint-config with name mobile-price-clf-2023-08-04-23-45-24
INFO:sagemaker:Creating endpoint with name mobile-price-clf-2023-08-04-23-45-24


-------!

In [97]:
endpoint_name

'mobile-price-clf-2023-08-04-23-45-24'

In [98]:
print(predictor.predict(test_x[features][0:2].values.tolist()))

[3 0]


In [99]:
session.delete_endpoint(endpoint_name=endpoint_name)

INFO:sagemaker:Deleting endpoint with name: mobile-price-clf-2023-08-04-23-45-24
