In [1]:
import boto3
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import CSVDeserializer


# Create a boto3 session in us-east-2
boto_session = boto3.Session(region_name="ca-central-1")
sagemaker_session = sagemaker.Session(boto_session=boto_session)

role = "arn:aws:iam::222634404112:role/SageMakerExecutionRole-ca"
job_name = "occupancy-model-t1"



sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/manraj/Library/Application Support/sagemaker/config.yaml


In [2]:
sklearn_estimator = SKLearn(
    entry_point="train.py",       # Use your training script
    source_dir=".",               # Package the current directory (which includes train.py, inference.py, requirements.txt, etc.)
    dependencies=["requirements.txt"],
    role=role,
    instance_type="ml.m5.xlarge",
    framework_version="0.23-1",
    output_path="s3://dana-minicapstone-ca/model-artifacts/",
    sagemaker_session=sagemaker_session,
    job_name=job_name
)

sklearn_estimator.fit()

2025-04-16 03:54:11 Starting - Starting the training job...
2025-04-16 03:54:40 Starting - Preparing the instances for training...
2025-04-16 03:55:12 Downloading - Downloading the training image...
2025-04-16 03:55:47 Training - Training image download completed. Training in progress...2025-04-16 03:55:57,020 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2025-04-16 03:55:57,023 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-16 03:55:57,064 sagemaker_sklearn_container.training INFO     Invoking user training script.
2025-04-16 03:55:57,269 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:
/miniconda3/bin/python -m pip install -r requirements.txt
Collecting xgboost
  Downloading xgboost-1.6.2-py3-none-manylinux2014_x86_64.whl (255.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 255.9/255.9 MB 6.5 MB/s eta 0:00:00
Collecting matplotlib
  Downloading matplotlib-3.5.3-cp3

In [3]:
from sagemaker.sklearn.model import SKLearnModel
from sagemaker.serializers import CSVSerializer

model = SKLearnModel(
    model_data=sklearn_estimator.model_data,
    role=role,
    entry_point="inference.py",  # Reference your inference script
    framework_version="0.23-1",
    py_version="py3",
    source_dir="."
)

predictor = model.deploy(initial_instance_count=1, instance_type="ml.m5.large")

------!

In [4]:
predictor.serializer = CSVSerializer()
predictor.deserializer = CSVDeserializer()

s3 = boto3.client('s3', region_name='ca-central-1')
bucket_name = "dana-minicapstone-ca"
test_key = "data/hvac_test.csv"
response = s3.get_object(Bucket=bucket_name, Key=test_key)
test_csv = response['Body'].read().decode('utf-8')

result = predictor.predict(test_csv)
print("Predictions from endpoint:\n", result)

Predictions from endpoint:
 [['Predicted_HVAC_kWh'], ['-0.09174354187602107'], ['3.9120071056391352'], ['3.137175181411598'], ['2.966215058042277'], ['-0.11315997674233036'], ['3.0042134049680187'], ['2.8470372272090687'], ['2.8244327539119487'], ['-0.14287483855908123'], ['2.9517269434932683'], ['2.8550756104811086'], ['2.7893357040904867'], ['-0.07010931953152011'], ['2.918200936427664'], ['2.734903422250066'], ['2.7488925130702073'], ['-0.11584084365026959'], ['2.86484113154028'], ['2.733579205700158'], ['2.90497906716871'], ['0.1801117371240275'], ['3.028146998662656'], ['2.9271651197857143'], ['2.918699497977115'], ['0.16863090712157103'], ['2.9433605470230026'], ['2.819047500842749'], ['2.7659206877762874'], ['0.07863626930630563'], ['2.7595400548479603'], ['2.3827292102700337'], ['2.5123513947868865'], ['0.03604726215447751'], ['2.5136448240826073'], ['2.3049221248619975'], ['2.310756800069243'], ['-0.15524340755057886'], ['3.3684540147141933'], ['2.917948149294052'], ['2.856590