In [90]:
import sagemaker
import boto3

In [91]:
sm_boto3 = boto3.client("sagemaker")
sess = sagemaker.Session()
region = sess.boto_session.region_name
bucket = 'retocreditriskkgv'
print("Usando bucket:", bucket)

Usando bucket: retocreditriskkgv


In [92]:
sk_prefix = "sagemaker/reto_credit_risk/sklearncontainer"
trainpath = sess.upload_data(
    path="data/toTrain/train-V-1.csv", bucket=bucket, key_prefix=sk_prefix
)
testpath = sess.upload_data(
    path="data/toTrain/test-V-1.csv", bucket=bucket, key_prefix=sk_prefix
)

In [93]:
%%writefile script.py

import argparse

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score
import sklearn
import joblib
import os
import numpy as np
import pandas as pd

def model_fn(model_dir):
    clf = joblib.load(os.path.join(model_dir, "model.joblib"))
    return clf

if __name__== "__main__":
    print("[INFO] Extracting arguments")
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--kernel", type=str, default="rbf")
    parser.add_argument("--C", type=float, default=1.0)
    parser.add_argument("--gamma", type=str, default="scale")
    
    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
    parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST"))
    parser.add_argument("--train-file", type=str, default="train-V-1.csv")
    parser.add_argument("--test-file", type=str, default="test-V-1.csv")
    
    args, _ = parser.parse_known_args()
    
    print("SKLearn Version: ", sklearn.__version__)
    print("Joblib Version: ", joblib.__version__)
    
    print("[INFO] Reading data")
    print()
    train_df = pd.read_csv(os.path.join(args.train, args.train_file))
    test_df = pd.read_csv(os.path.join(args.test, args.test_file))
    
    features = list(train_df.columns)
    label = features.pop(-1)
    
    print("Building training and testing datasets")
    print()
    
    X_train = train_df[features]
    X_test = test_df[features]
    
    y_train = train_df[label]
    y_test = test_df[label]
    
    print('Column order: ')
    print(features)
    print()
    
    print('Label column is:', label)
    print()
    
    print("Data Shape: ")
    print()
    print("----SHAPE OF TRAINING DATA (80%)")
    print(X_train.shape)
    print(y_train.shape)
    print()
    print("----SHAPE OF TESTING DATA (20%)")
    print(X_test.shape)
    print(y_test.shape)
    print()
    
    print("Training SVM Model....")
    print()
    model = SVC(kernel=args.kernel, C=args.C, gamma=args.gamma, verbose=True)
    model.fit(X_train, y_train)
    print()
    
    model_path = os.path.join(args.model_dir, "model.joblib")
    joblib.dump(model, model_path)
    print("Model persisted at "+ model_path)
    print()
    
    y_pred_test = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred_test)
    test_rep = classification_report(y_test, y_pred_test)
    
    print()
    print("--- METRICS RESULTS FOR TESTING DATA ---")
    print()
    print("Total Rows are:", X_test.shape[0])
    print("[TESTING] Model accuracy is: ", test_acc)
    print("[TESTING] Testing Report: ")
    print(test_rep)
    

Writing script.py


In [101]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"

sklearn_estimator = SKLearn(
    entry_point="script.py",
    role="arn:aws:iam::940583586544:role/sagemaker_credit_risk",
    instance_count=1,
    instance_type="ml.m5.large",
    framework_version=FRAMEWORK_VERSION,
    base_job_name = "RF-custom-sklearn",
    hyperparameters={
        "kernel": "rbf",
        "C": 1.0,
        "gamma": "scale"
    },
    use_spot_instances= True,
    max_wait = 7200,
    max_run = 3600
)

In [102]:
sklearn_estimator.fit({"train":trainpath, "test": testpath}, wait=True, logs=True)

2024-12-10 19:42:59 Starting - Starting the training job...
2024-12-10 19:43:14 Starting - Preparing the instances for training...
2024-12-10 19:43:48 Downloading - Downloading input data...
2024-12-10 19:44:14 Downloading - Downloading the training image...
2024-12-10 19:45:05 Training - Training image download completed. Training in progress.
2024-12-10 19:45:05 Uploading - Uploading generated training model[34m2024-12-10 19:44:58,893 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-12-10 19:44:58,896 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-12-10 19:44:58,934 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-12-10 19:44:59,078 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-12-10 19:44:59,089 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-

In [103]:
sklearn_estimator.latest_training_job.wait(logs="None")
artifact = sm_boto3.describe_training_job(
    TrainingJobName=sklearn_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

print("Model Artifact persisted at ", artifact)


2024-12-10 19:45:18 Starting - Preparing the instances for training
2024-12-10 19:45:18 Downloading - Downloading the training image
2024-12-10 19:45:18 Training - Training image download completed. Training in progress.
2024-12-10 19:45:18 Uploading - Uploading generated training model
2024-12-10 19:45:18 Completed - Training job completed
Model Artifact persisted at  s3://sagemaker-us-east-1-940583586544/RF-custom-sklearn-2024-12-10-19-42-49-819/output/model.tar.gz


In [104]:
artifact

's3://sagemaker-us-east-1-940583586544/RF-custom-sklearn-2024-12-10-19-42-49-819/output/model.tar.gz'

In [105]:
from sagemaker.sklearn.model import SKLearnModel
from time import gmtime, strftime

model_name = "Credit-risk-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model = SKLearnModel(
    name = model_name,
    model_data = artifact,
    role="arn:aws:iam::940583586544:role/sagemaker_credit_risk",
    entry_point="script.py",
    framework_version=FRAMEWORK_VERSION
)

In [106]:
model

<sagemaker.sklearn.model.SKLearnModel at 0x2529c65ce10>

In [107]:
endpoint_name = "Credit-risk-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName={}".format(endpoint_name))

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name=endpoint_name
)

EndpointName=Credit-risk-model-2024-12-10-19-46-04


------!