In [4]:
%%writefile train.py

## use this link for reference
## https://sagemaker.readthedocs.io/en/stable/frameworks/sklearn/using_sklearn.html

import argparse
import os
import pandas as pd
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Define model_fn to load the model for deployment
def model_fn(model_dir):
    """Load model from the model directory."""
    model_path = os.path.join(model_dir, "model.joblib")
    return joblib.load(model_path)
    
def main():
    # Parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
    parser.add_argument("--fit-intercept", type=bool, default=True)
    parser.add_argument("--normalize", type=bool, default=False)
    args = parser.parse_args()

    # Load training data
    train_data = pd.read_csv(os.path.join(args.train, "rental_pricing_dataset.csv"))
    
    # Separate features and target
    X = train_data.drop("Rent", axis=1)
    y = train_data["Rent"]

    # Split into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model with specified hyperparameters
    model = LinearRegression(fit_intercept=args.fit_intercept, normalize=args.normalize)
    model.fit(X_train, y_train)

    # Evaluate on validation set
    y_pred = model.predict(X_val)
    mse = mean_squared_error(y_val, y_pred)
    print(f"Validation Mean Squared Error: {mse}")

    # Save the model
    model_path = os.path.join(args.model_dir, "model.joblib")
    joblib.dump(model, model_path)

if __name__ == "__main__":
    main()


Overwriting train.py


In [None]:
# for local training
# first give access to Docker in domain by running this in shell
# aws --region us-east-1     sagemaker update-domain --domain-id d-73cncbueggsv     --domain-settings-for-update '{"DockerSettings": {"EnableDockerAccess": "ENABLED"}}'

In [None]:
# now install docker in local instance
!/bin/bash "AmazonSageMakerCourse/CustomAlgorithm/docker_install.sh"

In [5]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from sagemaker import get_execution_role

# Define S3 paths
bucket = "apartment-pricing"
prefix = "TrainData"
train_input = f"s3://{bucket}/{prefix}/"
output_path=f"s3://apartment-pricing/model/linear_regression/"

# Get SageMaker execution role
role = get_execution_role()

# Create SKLearn Estimator
sklearn_estimator = SKLearn(
    entry_point="train.py",
    framework_version="1.0-1",  # Change based on available versions
    # instance_type="ml.m5.large",
    instance_type="local",
    instance_count=1,
    role=role,
    sagemaker_session=sagemaker.LocalSession(),
    # sagemaker_session=sagemaker.Session(),
    output_path=output_path,
    hyperparameters={
        "fit-intercept": True,
        "normalize": False,
    },
)

# Launch training job
sklearn_estimator.fit({"train": train_input})


INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-12-02-06-30-30-682
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.local.image:'Docker Compose' found using Docker CLI.
INFO:sagemaker.local.local_session:Starting training job
INFO:sagemaker.local.image:docker compose file: 
services:
  sagemaker-local:
    command: train
    container_name: tizv2teaxh-sagemaker-local
    environment:
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit

Login Succeeded


INFO:sagemaker.local.image:image pulled: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.0-1-cpu-py3
INFO:sagemaker.local.image:docker command: docker compose -f /home/sagemaker-user/tmp/tmpgvmr38ng/docker-compose.yaml up --build --abort-on-container-exit


 Container tizv2teaxh-sagemaker-local  Creating
 Container tizv2teaxh-sagemaker-local  Created
Attaching to tizv2teaxh-sagemaker-local
tizv2teaxh-sagemaker-local  | 2024-12-02 06:32:24,923 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
tizv2teaxh-sagemaker-local  | 2024-12-02 06:32:24,928 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
tizv2teaxh-sagemaker-local  | 2024-12-02 06:32:24,931 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)
tizv2teaxh-sagemaker-local  | 2024-12-02 06:32:24,949 sagemaker-training-toolkit INFO     instance_groups entry not present in resource_config
tizv2teaxh-sagemaker-local  | 2024-12-02 06:32:24,963 sagemaker_sklearn_container.training INFO     Invoking user training script.
tizv2teaxh-sagemaker-local  | 2024-12-02 06:32:25,304 sagemaker_training.entry_point INFO     Bypass DNS check in case of Studio Local Mode execution.
tizv2teaxh-sagemaker

INFO:root:creating /home/sagemaker-user/tmp/tmpgvmr38ng/artifacts/output/data
INFO:root:copying /home/sagemaker-user/tmp/tmpgvmr38ng/sagemaker-local/output/success -> /home/sagemaker-user/tmp/tmpgvmr38ng/artifacts/output
INFO:root:copying /home/sagemaker-user/tmp/tmpgvmr38ng/model/model.joblib -> /home/sagemaker-user/tmp/tmpgvmr38ng/artifacts/model


tizv2teaxh-sagemaker-local  | 2024-12-02 06:32:27,041 sagemaker-containers INFO     Reporting training SUCCESS
[Ktizv2teaxh-sagemaker-local exited with code 0
Aborting on container exit...
 Container tizv2teaxh-sagemaker-local  Stopping
 Container tizv2teaxh-sagemaker-local  Stopped


INFO:sagemaker.local.image:===== Job Complete =====


In [6]:
# Deploy the trained model
predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
)

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-12-01-23-30-09-659
INFO:sagemaker:Creating endpoint-config with name sagemaker-scikit-learn-2024-12-01-23-30-09-659
INFO:sagemaker:Creating endpoint with name sagemaker-scikit-learn-2024-12-01-23-30-09-659


------!

In [9]:
# Use the endpoint to make predictions
data = [[859, 3, 3, 1, 0, 0, 2, 10, 8.5]]  # Example input
prediction = predictor.predict(data)
print(f"Prediction: {prediction}")

Prediction: [3155.70433793]


In [1]:
predictor.delete_endpoint()

NameError: name 'predictor' is not defined