In [None]:
#!pip install boto3

In [None]:
#!pip install sagemaker

In [None]:

import boto3
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

In [None]:
# Initialize SageMaker session and role
sagemaker_session = sagemaker.Session()
#role = sagemaker.get_execution_role()
role = r"arn:aws:iam::069641793383:role/service-role/AmazonSageMaker-ExecutionRole-20241128T231146"
bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/california-housing"

In [None]:
# Load California Housing dataset
california_data = fetch_california_housing(as_frame=True)
X = california_data.data
y = california_data.target

In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Save training and test data as CSV
train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis=1)

In [None]:
train_data_path = "train.csv"
test_data_path = "test.csv"
train_data.to_csv(train_data_path, index=False)
test_data.to_csv(test_data_path, index=False)

In [None]:
# Upload data to S3
train_data_s3_path = sagemaker_session.upload_data(train_data_path, bucket=bucket, key_prefix=prefix)
test_data_s3_path = sagemaker_session.upload_data(test_data_path, bucket=bucket, key_prefix=prefix)

In [None]:
print(f"Training data uploaded to: {train_data_s3_path}")
print(f"Test data uploaded to: {test_data_s3_path}")

In [None]:
# Create SKLearn Estimator
script_path = "training-script.py" 

In [None]:
output_path = f"s3://sagemaker-ap-south-1-069641793383/sagemaker/output"
sklearn_estimator = SKLearn(
    entry_point=script_path,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    framework_version="1.2-1",
    sagemaker_session=sagemaker_session,
    output_path=output_path,
    hyperparameters={"max_depth": 10},
)

In [None]:
# Start training
sklearn_estimator.fit({"train": train_data_s3_path})

In [None]:
# Deploy model as endpoint
predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name="california-housing-endpoint-2"
)

In [None]:
# Test predictions
test_sample = X_test.iloc[0:1]
response = predictor.predict(test_sample.to_numpy())
print(f"Prediction: {response}")

In [None]:
import boto3

sagemaker_client = boto3.client("sagemaker")

# Delete the existing endpoint configuration
sagemaker_client.delete_endpoint_config(EndpointConfigName="california-housing-endpoint")
