In [None]:
import joblib
import warnings
import sagemaker
import pandas as pd
from datetime import datetime
from aws.sagemaker import create_model
from sagemaker.inputs import TrainingInput
from utilities.evaluation import RegressionEvaluator
from utilities.utils import download_artifact, decompress_artifact, prediction

warnings.filterwarnings(action="ignore")

In [None]:
print(f"Sagemaker: {sagemaker.__version__}")
session = sagemaker.Session()
role = sagemaker.get_execution_role(sagemaker_session=session)

In [None]:
bucket = "housing-boston"
prefix = "boston-data"

train_path = f"s3://{bucket}/{prefix}/train/train.csv"
validation_path = f"s3://{bucket}/{prefix}/validation/validation.csv"

output_path = f"s3://{bucket}/{prefix}/output"

In [None]:
params = {
    "objective": "reg:linear",
    "num_round": 200,
    "max_depth": 5,
    "subsample": 0.7
}

estimator = create_model(image="xgboost", version="1.2-2",
                         role=role, instance_type="ml.t2.medium",
                         params=params, output_path=output_path, input_mode="File")

In [None]:
job_name = f"xgboost-boston-job-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"

train_data_channel = TrainingInput(s3_data=train_path, content_type="text/csv")
validation_data_channel = TrainingInput(s3_data=validation_path, content_type="text/csv")

In [None]:
estimator.fit(inputs={"train": train_data_channel, "validation": validation_data_channel}, job_name=job_name)

In [None]:
model_dir = f"{output_path}/{job_name}/output/model.tar.gz"

download_artifact(s3_path=model_dir, localpath=f"models/{job_name}/model.tar.gz")

In [None]:
decompress_artifact(localpath=f"models/{job_name}/model.tar.gz")