In [10]:
from sagemaker import image_uris, model_uris, script_uris


train_model_id, train_model_version, train_scope = "lightgbm-regression-model", "*", "training"

training_instance_type = "ml.m4.xlarge"

# Retrieve the docker image
train_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    model_id=train_model_id,
    model_version=train_model_version,
    image_scope=train_scope,
    instance_type=training_instance_type,
)
# Retrieve the training script
train_source_uri = script_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, script_scope=train_scope
)
# Retrieve the pre-trained model 
train_model_uri = model_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, model_scope=train_scope
)

In [11]:
# Sample training data is available in this bucket
bucket='ml-projects-1'
prefix = 'house_prices'
input_folder_train = 'clean_data_2/training'
data_location_train_path = 's3://{}/{}/{}'.format(bucket, prefix, input_folder_train)

#output path
output_folder = 'output_model'
output_location_model_path = 's3://{}/{}/{}'.format(bucket, prefix, output_folder)

In [34]:
from sagemaker.estimator import Estimator
import sagemaker
from sagemaker import hyperparameters

hyperparameters = hyperparameters.retrieve_default(
    model_id=train_model_id, model_version=train_model_version
)

hyperparameters[
    "num_boost_round"]= "550"
hyperparameters['metric'] = "rmse"
hyperparameters['max_depth'] = "13"
hyperparameters["num_leaves"]: "55"

    
print(hyperparameters)

# Create SageMaker Estimator instance
lgbm_estimator = Estimator(
    role=sagemaker.get_execution_role(),
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=output_location_model_path,
)

{'num_boost_round': '300', 'early_stopping_rounds': '30', 'metric': 'rmse', 'learning_rate': '0.009', 'num_leaves': '67', 'feature_fraction': '0.74', 'bagging_fraction': '0.53', 'bagging_freq': '5', 'max_depth': '15', 'min_data_in_leaf': '26', 'max_delta_step': '0.0', 'lambda_l1': '0.0', 'lambda_l2': '0.0', 'boosting': 'gbdt', 'min_gain_to_split': '0.0', 'tree_learner': 'serial', 'feature_fraction_bynode': '1.0', 'is_unbalance': 'False', 'max_bin': '255', 'tweedie_variance_power': '1.5', 'num_threads': '0', 'verbosity': '1', 'use_dask': 'False'}


In [35]:
#Training phase
training_job_name = 'Training-house-prices-test-23'
lgbm_estimator.fit(
        {"training": data_location_train_path}, logs=True, job_name=training_job_name
    )

INFO:sagemaker:Creating training-job with name: Training-house-prices-test-23


2023-03-10 05:46:51 Starting - Starting the training job......
2023-03-10 05:47:24 Starting - Preparing the instances for training...
2023-03-10 05:48:16 Downloading - Downloading input data...
2023-03-10 05:48:41 Training - Downloading the training image......
2023-03-10 05:49:47 Uploading - Uploading generated training model[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-03-10 05:49:34,178 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-03-10 05:49:34,180 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-03-10 05:49:34,191 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-03-10 05:49:34,193 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2023-03-10 05:49:34,481 sagemaker-training-toolkit INFO     Insta

In [29]:
inference_instance_type = "ml.m4.xlarge"

# Retrieve the inference docker container uri
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=train_model_id,
    model_version=train_model_version,
    instance_type=inference_instance_type,
)
# Retrieve the inference script uri
deploy_source_uri = script_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, script_scope="inference"
)

# Use the estimator from the previous step to deploy to a SageMaker endpoint
predictor_2 = lgbm_estimator.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri)

INFO:sagemaker:Creating model with name: sagemaker-jumpstart-2023-03-10-04-36-16-542
INFO:sagemaker:Creating endpoint-config with name sagemaker-jumpstart-2023-03-10-04-36-16-542
INFO:sagemaker:Creating endpoint with name sagemaker-jumpstart-2023-03-10-04-36-16-542


-------!

In [30]:
predictor_2.endpoint_name

'sagemaker-jumpstart-2023-03-10-04-36-16-542'

In [31]:
import re
import boto3
import json
import numpy as np

# Sample test data is available in this bucket
bucket='ml-projects-1'
key = 'house_prices/clean_data_2/test/Test_House_Prices.csv'

# Resquest step for predict house prices
S3 = boto3.client('s3')
obj = S3.get_object(Bucket=bucket, Key=key)
data_string = obj['Body'].read().decode('utf-8')
ENDPOINT_NAME = predictor_2.endpoint_name
runtime= boto3.client('runtime.sagemaker')
response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                      ContentType='text/csv',
                                       Body=data_string)
dict_predictions = {}
response_body = json.loads(response["Body"].read())
dict_predictions["prediction"] = response_body["prediction"]

#Save prediction dictionary in json file
with open('predictions-House-Prices.json', 'w', encoding ='utf8') as json_file:
    json.dump(dict_predictions, json_file)