In [2]:
!pip install sagemaker ipywidgets --upgrade --quiet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import sagemaker, boto3, json
from sagemaker.session import Session

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

import warnings
warnings.filterwarnings("ignore")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [4]:
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.estimator import Estimator

model_id, model_version = "tensorflow-ic-imagenet-mobilenet-v2-100-224-classification-4", "*"
training_instance_type = "ml.g4dn.xlarge"

In [5]:
# Retrieve the Docker image
train_image_uri = image_uris.retrieve(model_id=model_id,model_version=model_version,image_scope="training",instance_type=training_instance_type,region=None,framework=None)
# Retrieve the training script
train_source_uri = script_uris.retrieve(model_id=model_id, model_version=model_version, script_scope="training")

# Retrieve the pretrained model tarball for transfer learning
train_model_uri = model_uris.retrieve(model_id=model_id, model_version=model_version, model_scope="training")

Using model 'tensorflow-ic-imagenet-mobilenet-v2-100-224-classification-4' with wildcard version identifier '*'. You can pin to version '4.0.2' for more stable results. Note that models may have different input/output signatures after a major version upgrade.


In [6]:
# The data is available in the following S3 bucket
training_data_bucket = "isicbucket"
training_data_prefix = "preprocessed/train_preprocess_ps_224_new"

training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}"

training_job_name = "mobilenet-v2-100-224"

# Model and model artefacts will be saved to the following S3 bucket
output_bucket = sess.default_bucket()
output_prefix = "mobilenet-v2-100-224"
s3_output_location = f"s3://{output_bucket}/{output_prefix}/ps_output_v3"

In [7]:
# Retrieve the default hyper-parameters for fine-tuning the model
hyperparameters = hyperparameters.retrieve_default(model_id=model_id, model_version=model_version)

# [Optional] Override default hyperparameters with custom values
hyperparameters["epochs"] = "10"
hyperparameters['optimizer'] = 'sgd'
hyperparameters["augmentation"] = True
hyperparameters["early_stopping"] =  True
# hyperparameters["early_stopping_min_delta"] = '1.0'
print(str(hyperparameters).replace(", ", "\n"))

{'train_only_top_layer': 'True'
'epochs': '10'
'batch_size': '32'
'optimizer': 'sgd'
'learning_rate': '0.001'
'beta_1': '0.9'
'beta_2': '0.999'
'momentum': '0.9'
'epsilon': '1e-07'
'rho': '0.95'
'initial_accumulator_value': '0.1'
'reinitialize_top_layer': 'Auto'
'early_stopping': True
'early_stopping_patience': '5'
'early_stopping_min_delta': '0.0'
'dropout_rate': '0.2'
'regularizers_l2': '0.0001'
'label_smoothing': '0.1'
'image_resize_interpolation': 'bilinear'
'augmentation': True
'augmentation_random_flip': 'horizontal_and_vertical'
'augmentation_random_rotation': '0.2'
'augmentation_random_zoom': '0.1'
'binary_mode': 'False'
'eval_metric': 'accuracy'
'validation_split_ratio': '0.2'
'random_seed': '123'}


In [8]:
from sagemaker.tuner import ContinuousParameter

# Use AMT for tuning and selecting the best model
use_amt = False

# Define objective metric per framework, based on which the best model will be selected.
amt_metric_definitions = {
    "metrics": [{"Name": "val_accuracy", "Regex": "val_accuracy: ([0-9\\.]+)"}],
    "type": "Maximize",
}

# You can select from the hyperparameters supported by the model, and configure ranges of values to be searched for training the optimal model.(https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-define-ranges.html)
hyperparameter_ranges = {
    "adam-learning-rate": ContinuousParameter(0.0001, 0.1, scaling_type="Logarithmic")
}

# Increase the total number of training jobs run by AMT, for increased accuracy (and training time).
max_jobs = 6
# Change parallel training jobs run by AMT to reduce total training time, constrained by your account limits.
# if max_jobs=max_parallel_jobs then Bayesian search turns to Random.
max_parallel_jobs = 2

In [10]:
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base
from sagemaker.tuner import HyperparameterTuner

training_job_name = name_from_base(f"jumpstart-example-{model_id}-transfer-learning")

training_metric_definitions = [
    {"Name": "val_accuracy", "Regex": "val_accuracy: ([0-9\\.]+)"},
    {"Name": "val_loss", "Regex": "val_loss: ([0-9\\.]+)"},
    {"Name": "train_accuracy", "Regex": "- accuracy: ([0-9\\.]+)"},
    {"Name": "train_loss", "Regex": "- loss: ([0-9\\.]+)"},
]

# Create SageMaker Estimator instance
ic_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=s3_output_location,
    base_job_name=training_job_name,
    metric_definitions=training_metric_definitions,
)

if use_amt:
    hp_tuner = HyperparameterTuner(
        ic_estimator,
        amt_metric_definitions["metrics"][0]["Name"],
        hyperparameter_ranges,
        amt_metric_definitions["metrics"],
        max_jobs=max_jobs,
        max_parallel_jobs=max_parallel_jobs,
        objective_type=amt_metric_definitions["type"],
        base_tuning_job_name=training_job_name,
    )

    # Launch a SageMaker Tuning job to search for the best hyperparameters
    hp_tuner.fit({"training": training_dataset_s3_path})
else:
    # Launch a SageMaker Training job by passing s3 path of the training data
    ic_estimator.fit({"training": training_dataset_s3_path}, logs=True)

INFO:sagemaker:Creating training-job with name: jumpstart-example-tensorflow-ic-imagene-2024-03-19-17-41-20-413


2024-03-19 17:41:20 Starting - Starting the training job...
2024-03-19 17:41:35 Starting - Preparing the instances for training...
2024-03-19 17:42:12 Downloading - Downloading input data......
2024-03-19 17:43:17 Downloading - Downloading the training image..................
2024-03-19 17:46:02 Training - Training image download completed. Training in progress..[34m2024-03-19 17:46:14.179993: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2024-03-19 17:46:14.180544: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34m2024-03-19 17:46:14.180767: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `

In [11]:
if use_amt:
    training_job_name = hp_tuner.best_training_job()
else:
    training_job_name = ic_estimator.latest_training_job.job_name

# Print out where the model is saved at


In [12]:
import boto3

sm_boto3 = boto3.client("sagemaker")

training_job_name = ic_estimator.latest_training_job.job_name
print(f'Training name: {training_job_name}\n')

ic_estimator.latest_training_job.wait(logs="None")
artifact = sm_boto3.describe_training_job(
    TrainingJobName=ic_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

print("\nModel artifact persisted at " + artifact)

Training name: jumpstart-example-tensorflow-ic-imagene-2024-03-19-17-41-20-413


2024-03-19 18:18:05 Starting - Preparing the instances for training
2024-03-19 18:18:05 Downloading - Downloading the training image
2024-03-19 18:18:05 Training - Training image download completed. Training in progress.
2024-03-19 18:18:05 Uploading - Uploading generated training model
2024-03-19 18:18:05 Completed - Training job completed

Model artifact persisted at s3://sagemaker-us-west-2-766088526747/mobilenet-v2-100-224/ps_output_v3/jumpstart-example-tensorflow-ic-imagene-2024-03-19-17-41-20-413/output/model.tar.gz


In [13]:
from sagemaker import TrainingJobAnalytics

df = TrainingJobAnalytics(training_job_name=training_job_name).dataframe()

df

Unnamed: 0,timestamp,metric_name,value
0,0.0,val_accuracy,0.6912
1,180.0,val_accuracy,0.6927
2,360.0,val_accuracy,0.7156
3,540.0,val_accuracy,0.678
4,720.0,val_accuracy,0.7239
5,900.0,val_accuracy,0.7106
6,1080.0,val_accuracy,0.7106
7,1260.0,val_accuracy,0.7239
8,1440.0,val_accuracy,0.6801
9,1620.0,val_accuracy,0.723896


# Deploy the model for inference

In [14]:
inference_instance_type = "ml.c5.xlarge"

# Retrieve the inference docker container uri
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)
# Retrieve the inference script uri
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference"
)

endpoint_name = name_from_base(f"jumpstart-example-FT-{model_id}-")

# Use the estimator from the previous step to deploy to a SageMaker endpoint
finetuned_predictor = ic_estimator.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name,
)

INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-west-2-766088526747/mobilenet-v2-100-224/ps_output_v3/jumpstart-example-tensorflow-ic-imagene-2024-03-19-17-41-20-413/output/model.tar.gz), script artifact (s3://jumpstart-cache-prod-us-west-2/source-directory-tarballs/tensorflow/inference/ic/v2.0.3/sourcedir.tar.gz), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-west-2-766088526747/sagemaker-jumpstart-2024-03-19-18-39-26-681/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: sagemaker-jumpstart-2024-03-19-18-39-26-681
INFO:sagemaker:Creating endpoint-config with name jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-18-39-26-681
INFO:sagemaker:Creating endpoint with name jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-18-39-26-681


----!

In [15]:
s3_bucket = "isicbucket"
key_prefix = "preprocessed/test_preprocess_ps_224_new"


def download_from_s3(images):
    for filename, image_key in images.items():
        boto3.client("s3").download_file(s3_bucket, f"{key_prefix}/{image_key}", filename)


skin_images = {
    "img1.jpg":"actinic keratoses/ISIC_0026984.jpg",
    "img2.jpg":"basal cell carcinoma/ISIC_0025301.jpg",
    "img3.jpg": "dermatofibroma/ISIC_0024973.jpg",
    "img4.jpg":"keratosis/ISIC_0012757_downsampled.jpg",
    "img5.jpg":"melanoma/ISIC_0000167.jpg",
    "img6.jpg":"nevus/ISIC_0000012.jpg",
    "img7.jpg":"unknown/ISIC_0151200.jpg",
    "img8.jpg": "vascular skin/ISIC_0026092.jpg",
}
download_from_s3(skin_images)

In [16]:
from IPython.core.display import HTML

for image_filename in skin_images.keys():
    with open(image_filename, "rb") as file:
        img = file.read()
    query_response = finetuned_predictor.predict(
        img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"}
    )
    model_predictions = json.loads(query_response)
    predicted_label = model_predictions["predicted_label"]
    display(
        HTML(
            f'<img src={image_filename} alt={image_filename} align="left" style="width: 250px;"/>'
            f"<figcaption>Predicted Label: {predicted_label}</figcaption>"
        )
    )

In [17]:
finetuned_predictor.delete_model()
finetuned_predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: sagemaker-jumpstart-2024-03-19-18-39-26-681
INFO:sagemaker:Deleting endpoint configuration with name: jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-18-39-26-681
INFO:sagemaker:Deleting endpoint with name: jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-18-39-26-681


# Incrementally train the fine-tuned model

Incremental training allows you to train a new model using an expanded dataset that contains an underlying pattern that was not accounted for in the previous training and which resulted in poor model performance. You can use the artifacts from an existing model and use an expanded dataset to train a new model. Incremental training saves both time and resources as you don’t need to retrain a model from scratch.

One may use any dataset (old or new) as long as the dataset format remain the same (set of classes). Incremental training step is similar to the finetuning step discussed above with the following difference: In fine-tuning above, we start with a pre-trained model whereas in incremental training, we start with an existing fine-tuned model.

In [18]:
# Identify the previously trained model path based on the output location where artifacts are stored previously and the training job name.

if use_amt:  # If using amt, select the model for the best training job.
    sage_client = boto3.Session().client("sagemaker")
    tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=hp_tuner._current_job_name
    )
    last_training_job_name = tuning_job_result["BestTrainingJob"]["TrainingJobName"]
else:
    last_training_job_name = ic_estimator._current_job_name

last_trained_model_path = f"{s3_output_location}/{last_training_job_name}/output/model.tar.gz"

In [19]:
incremental_train_output_prefix = "jumpstart-example-ic-incremental-training"

incremental_s3_output_location = f"s3://{output_bucket}/{incremental_train_output_prefix}/output"

incremental_training_job_name = name_from_base(f"jumpstart-example-{model_id}-incremental-training")

incremental_train_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=last_trained_model_path,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=incremental_s3_output_location,
    base_job_name=incremental_training_job_name,
    metric_definitions=training_metric_definitions,
)

incremental_train_estimator.fit({"training": training_dataset_s3_path}, logs=True)

INFO:sagemaker:Creating training-job with name: jumpstart-example-tensorflow-ic-imagene-2024-03-19-18-42-04-359


2024-03-19 18:42:04 Starting - Starting the training job...
2024-03-19 18:42:19 Starting - Preparing the instances for training...
2024-03-19 18:42:57 Downloading - Downloading input data.........
2024-03-19 18:44:07 Downloading - Downloading the training image...............
2024-03-19 18:46:43 Training - Training image download completed. Training in progress.[34m2024-03-19 18:46:53.267811: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2024-03-19 18:46:53.268343: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34m2024-03-19 18:46:53.268577: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `T

In [20]:
incremental_training_job_name = incremental_train_estimator.latest_training_job.job_name

In [21]:
incremental_df = TrainingJobAnalytics(training_job_name=incremental_training_job_name).dataframe()

incremental_df

Unnamed: 0,timestamp,metric_name,value
0,0.0,val_accuracy,0.7135
1,180.0,val_accuracy,0.7058
2,360.0,val_accuracy,0.7215
3,540.0,val_accuracy,0.6811
4,780.0,val_accuracy,0.7255
5,960.0,val_accuracy,0.7168
6,1140.0,val_accuracy,0.7002
7,1320.0,val_accuracy,0.7203
8,1500.0,val_accuracy,0.6904
9,1680.0,val_accuracy,0.725532


In [23]:
inference_instance_type = "ml.c5.xlarge"

# Retrieve the inference docker container uri
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)
# Retrieve the inference script uri
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference"
)

endpoint_name = name_from_base(f"jumpstart-example-FT-{model_id}-")

# Use the estimator from the previous step to deploy to a SageMaker endpoint
finetuned_predictor = incremental_train_estimator.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name,
)

INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-west-2-766088526747/jumpstart-example-ic-incremental-training/output/jumpstart-example-tensorflow-ic-imagene-2024-03-19-18-42-04-359/output/model.tar.gz), script artifact (s3://jumpstart-cache-prod-us-west-2/source-directory-tarballs/tensorflow/inference/ic/v2.0.3/sourcedir.tar.gz), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-west-2-766088526747/sagemaker-jumpstart-2024-03-19-19-19-25-273/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: sagemaker-jumpstart-2024-03-19-19-19-25-273
INFO:sagemaker:Creating endpoint-config with name jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-19-19-25-273
INFO:sagemaker:Creating endpoint with name jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-19-19-25-273


----!

In [24]:
from IPython.core.display import HTML

for image_filename in skin_images.keys():
    with open(image_filename, "rb") as file:
        img = file.read()
    query_response = finetuned_predictor.predict(
        img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"}
    )
    model_predictions = json.loads(query_response)
    predicted_label = model_predictions["predicted_label"]
    display(
        HTML(
            f'<img src={image_filename} alt={image_filename} align="left" style="width: 250px;"/>'
            f"<figcaption>Predicted Label: {predicted_label}</figcaption>"
        )
    )

In [25]:
finetuned_predictor.delete_model()
finetuned_predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: sagemaker-jumpstart-2024-03-19-19-19-25-273
INFO:sagemaker:Deleting endpoint configuration with name: jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-19-19-25-273
INFO:sagemaker:Deleting endpoint with name: jumpstart-example-FT-tensorflow-ic-imag-2024-03-19-19-19-25-273
