In [15]:
import sagemaker
from sklearn.model_selection import train_test_split
import boto3
import pandas as pd



REGION = "us-east-2"

session = boto3.Session(region_name=REGION)
sm_boto3 = session.client("sagemaker")
sagemaker_session = sagemaker.Session(boto_session=session)
bucket = 'spicemovesagemakerbucketmob'
print('Using Bucket ' + bucket)

Using Bucket spicemovesagemakerbucketmob


In [26]:
df = pd.read_csv('mob_price_clasification.csv')

df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,6,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,6,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,9,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,14,1208,1212,1411,8,2,15,1,1,0,1


In [18]:
df.shape

(2000, 21)

In [21]:
df.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

In [30]:
y = df['price_range']

y.shape

(2000,)

In [None]:
x = df.drop('price_range', axis=1)

x.shape

(2000, 20)

In [32]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)

In [41]:

label_col = "label"

trainX = pd.DataFrame(X_train)
trainX[label_col] = y_train

testX = pd.DataFrame(X_test)
testX[label_col] = y_test

In [43]:
testX.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,label
405,1454,1,0.5,1,1,0,34,0.7,83,4,3,250,1033,3419,7,5,5,1,1,0,3
1190,1092,1,0.5,1,10,0,11,0.5,167,3,14,468,571,737,14,4,11,0,1,0,0
1132,1524,1,1.8,1,0,0,10,0.6,174,4,1,154,550,2678,16,5,13,1,0,1,2
731,1807,1,2.1,0,2,0,49,0.8,125,1,10,337,1384,1906,17,13,13,0,1,1,2
1754,1086,1,1.7,1,0,1,43,0.2,111,6,1,56,1150,3285,11,5,17,1,1,0,2


In [47]:
trainX.to_csv('train-V-1.csv', index = False)
testX.to_csv('test-V-1.csv', index = False)

In [49]:
# Send data S3. Sagemaker 

sk_prefix = "sagemaker/mobile_price_clasification/sklearcontainer"
trainpath = sagemaker_session.upload_data(
    path = "train-V-1.csv", bucket=bucket, key_prefix = sk_prefix
)

testpath = sagemaker_session.upload_data(
    path = "test-V-1.csv", bucket=bucket, key_prefix = sk_prefix
)

In [50]:
print(trainpath)
print(testpath)

s3://spicemovesagemakerbucketmob/sagemaker/mobile_price_clasification/sklearcontainer/train-V-1.csv
s3://spicemovesagemakerbucketmob/sagemaker/mobile_price_clasification/sklearcontainer/test-V-1.csv


In [52]:
%%writefile script.py

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score
import sklearn
import joblib
import boto3
import pathlib
from io import StringIO
import argparse
import os


def model_fn(model_fn):
    clf = joblib.load(os.path.join(model_dir, "model.joblib"))
    return clf

if __name__ == "__main__":
    
    print("[INFO] Extracting arguments")
    parser = argpares.ArgumentParser()
    
    parser.add_argument("--n_estimators", type=int, default=100)
    parser.add_argument("--random_state", type=int, default=0)
    
    parser.add_argument("--nodel-dir", type=int, default=os.environ.get("SM_MODEL_DIR"))
    parser.add_argument("--train", type=int, default=os.environ.get("SM_CHANNEL_TRAIN"))
    parser.add_argument("--test", type=int, default=os.environ.get("SM_CHANNEL_TEST"))
    parser.add_argument("--train-file", type=int, default=os.environ.get("train-V-1.csv"))
    parser.add_argument("--test-file", type=int, default=os.environ.get("test-V-1.csv"))
    
    args, _ = parser.parse_know_args()
    
    print("SKLearn Version: ", sklearn.__version__)
    print("jonlib Version: ", joblib.__version__)
    
    print("[INFO] Reading data")
    print()
    train_df = pd.read_csv(os.path.join(args.train, args.train_file))
    test_df = pd.read_csv(os.path.join(args.test, args.test_file))
    
    features = list(train_df.columns)
    label = features.pop(-1)
    
    print("Building training and testing datasets")
    print()
    
    X_train = train_df[features]
    X_test = test_df[features]
    y_train = train_df[label]
    y_test = test_df[label]
    
    print('column order: ')
    print(features)
    print()
    
    print("Label column is: ", label)
    print()
    
    print("Data Shape: ")
    print()
    print("---- SHAPE OF TRAINING DATA (85%) ----")
    print(X_train.shape)
    print(y_train.shape)
    print()
    print("---- SHAPE OF TESTING DATA (15%) ----")
    print(X_test.shape)
    print(y_test.shape)
    print()
    
    print("Training RandomForest Mdoel....")
    print()
    model = RandomForestClassifier(n_estimatores=args.n_estimators, random_state= args.random_state)
    model.fit(X_train, y_train)
    print()
    
    model_path = os.path.join(args.model_dir, "model.joblib")
    joblib.dump(model,model_path)
    print("model persisted at" + model_path)
    print()
    
    y_pred_test = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred_test)
    test_rep = classification_report(y_test, y_pred_test)
    
    print()
    print("---- METRICS RESULTS FOR TESTING DATA ----")
    print()
    print("Total Rows are: ", X_test.shape[0])
    print('[TESTING] Model Accuracy is: ', test_acc)
    print('[TESTING] Testing Report: ')
    print(test_rep)
    
    

Writing script.py


In [69]:
from sagemaker.sklearn.estimator import SKLearn

REGION = "us-east-2"
FRAMEWORK_VERSION = "0.23-1"

boto_sess = boto3.Session(region_name=REGION)
sagemaker_session = sagemaker.Session(boto_session=boto_sess)

sklearn_estimator = SKLearn(
    entry_point="script.py",
    role="arn:aws:iam::966127384044:role/SageMakerExecutionRole",
    instance_count=1,
    instance_type = "ml.m5.large",
    framework_version=FRAMEWORK_VERSION,
    base_job_name = "RF-custom-sklearn",
    hyperparameters={
        "n_estimators": 100,
        "random_state": 0,
    },
    max_run = 3600,
    sagemaker_session=sagemaker_session 
)

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [70]:
sklearn_estimator.fit({"train": trainpath, "test": testpath}, wait = True)

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: RF-custom-sklearn-2025-06-19-20-58-07-840


2025-06-19 20:58:10 Starting - Starting the training job...
2025-06-19 20:58:44 Downloading - Downloading input data...
2025-06-19 20:59:09 Downloading - Downloading the training image.....2025-06-19 20:59:59,369 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2025-06-19 20:59:59,372 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-06-19 20:59:59,410 sagemaker_sklearn_container.training INFO     Invoking user training script.
2025-06-19 20:59:59,552 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-06-19 20:59:59,563 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-06-19 20:59:59,575 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-06-19 20:59:59,584 sagemaker-training-toolkit INFO     Invoking user script
Training Env:
{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
    

In [72]:
sklearn_estimator.latest_training_job.wait(logs="None")
artifact = sm_boto3.describe_training_job(
    TrainingJobName = sklearn_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

print("model artifacts persisted at " + artifact)


2025-06-19 21:00:23 Starting - Preparing the instances for training
2025-06-19 21:00:23 Downloading - Downloading the training image
2025-06-19 21:00:23 Training - Training image download completed. Training in progress.
2025-06-19 21:00:23 Uploading - Uploading generated training model
2025-06-19 21:00:23 Completed - Training job completed
model artifacts persisted at s3://sagemaker-us-east-2-966127384044/RF-custom-sklearn-2025-06-19-20-58-07-840/output/model.tar.gz


In [None]:
from sagemaker.sklearn.model import SKLearnModel
import boto3
from sagemaker.session import Session
from time import strftime, gmtime

boto_session = boto3.Session(region_name="us-east-2")
sagemaker_session = Session(boto_session=boto_session)

# Your SageMaker execution role ARN here
role = "arn:aws:iam::966127384044:role/SageMakerExecutionRole"

# Correct model S3 path
model_data = "s3://sagemaker-us-east-2-966127384044/sagemaker-scikit-learn-2025-06-19-21-25-05-662/sourcedir.tar.gz"

model = SKLearnModel(
    model_data=model_data,
    role=role,
    entry_point="script.py",  # Your training script file
    framework_version="0.23-1",
    sagemaker_session=sagemaker_session
)

endpoint_name = "Custom-sklearn-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName={}".format(endpoint_name))

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name,
)


INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


EndpointName=Custom-sklearn-model-2025-06-19-21-27-05


INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2025-06-19-21-27-08-399
INFO:sagemaker:Creating endpoint-config with name Custom-sklearn-model-2025-06-19-21-27-05
INFO:sagemaker:Creating endpoint with name Custom-sklearn-model-2025-06-19-21-27-05


--------------

In [8]:
response = sagemaker_client.list_models(
    SortBy="CreationTime",
    SortOrder="Descending",
    MaxResults=5
)

for model in response["Models"]:
    print(f"Model Name: {model['ModelName']}, Created At: {model['CreationTime']}")


Model Name: sagemaker-scikit-learn-2025-06-19-21-27-08-399, Created At: 2025-06-19 17:27:08.514000-04:00


In [9]:
model_name = "sagemaker-scikit-learn-2025-06-19-21-27-08-399"  # Replace with the name from above

try:
    sagemaker_client.delete_model(ModelName=model_name)
    print(f"Deleted model: {model_name}")
except sagemaker_client.exceptions.ClientError as e:
    print(f"Failed to delete model: {e}")


Deleted model: sagemaker-scikit-learn-2025-06-19-21-27-08-399


In [10]:
import boto3
from botocore.exceptions import ClientError

region = "us-east-2"  # replace with your AWS region
sagemaker_client = boto3.client("sagemaker", region_name=region)

# 1. Delete all endpoints
print("Deleting all endpoints...")
endpoints = sagemaker_client.list_endpoints()["Endpoints"]
for ep in endpoints:
    name = ep["EndpointName"]
    try:
        sagemaker_client.delete_endpoint(EndpointName=name)
        print(f"✅ Deleted endpoint: {name}")
    except ClientError as e:
        print(f"❌ Failed to delete endpoint {name}: {e}")

# 2. Delete all endpoint configs
print("\nDeleting all endpoint configurations...")
configs = sagemaker_client.list_endpoint_configs()["EndpointConfigs"]
for cfg in configs:
    name = cfg["EndpointConfigName"]
    try:
        sagemaker_client.delete_endpoint_config(EndpointConfigName=name)
        print(f"✅ Deleted endpoint config: {name}")
    except ClientError as e:
        print(f"❌ Failed to delete endpoint config {name}: {e}")

# 3. Delete all models
print("\nDeleting all models...")
models = sagemaker_client.list_models()["Models"]
for model in models:
    name = model["ModelName"]
    try:
        sagemaker_client.delete_model(ModelName=name)
        print(f"✅ Deleted model: {name}")
    except ClientError as e:
        print(f"❌ Failed to delete model {name}: {e}")

# 4. (Optional) Stop training jobs
print("\nStopping all in-progress training jobs...")
jobs = sagemaker_client.list_training_jobs()["TrainingJobSummaries"]
for job in jobs:
    name = job["TrainingJobName"]
    status = job["TrainingJobStatus"]
    if status in ["InProgress", "Stopping"]:
        try:
            sagemaker_client.stop_training_job(TrainingJobName=name)
            print(f"🛑 Stopped training job: {name}")
        except ClientError as e:
            print(f"❌ Failed to stop job {name}: {e}")
    else:
        print(f"ℹ️ Job already completed/stopped: {name} ({status})")


Deleting all endpoints...
❌ Failed to delete endpoint Custom-sklearn-model-2025-06-19-21-27-05: An error occurred (ValidationException) when calling the DeleteEndpoint operation: Cannot update in-progress endpoint "arn:aws:sagemaker:us-east-2:966127384044:endpoint/Custom-sklearn-model-2025-06-19-21-27-05".

Deleting all endpoint configurations...

Deleting all models...

Stopping all in-progress training jobs...
ℹ️ Job already completed/stopped: RF-custom-sklearn-2025-06-19-20-58-07-840 (Completed)
ℹ️ Job already completed/stopped: RF-custom-sklearn-2025-06-19-20-48-57-317 (Failed)


In [11]:
response = sagemaker_client.describe_endpoint(EndpointName="Custom-sklearn-model-2025-06-19-21-27-05")
print("Endpoint status:", response["EndpointStatus"])


Endpoint status: Creating


In [13]:
import time

endpoint_name = "Custom-sklearn-model-2025-06-19-21-27-05"

while True:
    status = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)["EndpointStatus"]
    print(f"Current status: {status}")
    if status in ["InService", "Failed"]:
        sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
        print("✅ Deleted endpoint.")
        break
    elif status in ["Deleting"]:
        print("Already deleting... waiting...")
        time.sleep(100)
    else:
        print("Still in progress... waiting...")
        time.sleep(150)


Current status: Creating
Still in progress... waiting...
Current status: Creating
Still in progress... waiting...
Current status: Failed
✅ Deleted endpoint.
