# Retrieving The Last Completed Training Job

In [None]:
import sagemaker

# Create a SageMaker session
sagemaker_session = sagemaker.Session()

# List most recent completed training job
training_jobs = sagemaker_session.sagemaker_client.list_training_jobs(
    SortBy='CreationTime',                 # Sort jobs by creation time
    SortOrder='Descending',                # Newest jobs first
    StatusEquals='Completed',              # Only include completed jobs
    NameContains='sagemaker-scikit-learn'  # Filter to jobs containing 'sagemaker-scikit-learn' in name
)
print(training_jobs)  # Display the list of training jobs


In [None]:
# Extract the name of the latest training job of the list
TRAINING_JOB_NAME = training_jobs['TrainingJobSummaries'][0]['TrainingJobName']
print(f"Latest completed training job: {TRAINING_JOB_NAME}")


# Reconnecting to a Training Job

In [None]:
from sagemaker.sklearn.estimator import SKLearn

# Attach to the latest completed training job
estimator = SKLearn.attach(TRAINING_JOB_NAME)
print(f"Attached to training job: {estimator.training_job_name}")


# Getting the Model Location from the Estimator


In [None]:
# Get model S3 location from the estimator
model_s3_uri = estimator.model_data
print(f"Model artifacts are located at: {model_s3_uri}")


# Downloading Model Artifacts from S3


In [None]:
from sagemaker.s3 import S3Downloader

# Download the model artifact from S3 using SageMaker's S3Downloader
S3Downloader.download(model_s3_uri, ".")


# Extracting and Loading the Trained Model


In [None]:
import tarfile
import joblib

# Extract model files from the downloaded tar.gz file
with tarfile.open("model.tar.gz", 'r:gz') as tar:
    tar.extractall(".")

# Load the trained model
model = joblib.load("model.joblib")
print("Model loaded successfully.")


In [None]:
# Print the model's coefficients (coef_) and intercept (intercept_)
print("Model Coefficients:", model.coef_)
print("Model Intercept:", model.intercept_)


# Preparing and Loading Test Data


In [None]:
import pandas as pd

# Path to test data
TEST_DATA_FILE = "data/california_housing_test.csv"

# Load test data from CSV and separate features (X_test) from target variable (y_test)
test_df = pd.read_csv(TEST_DATA_FILE)
X_test = test_df.drop("MedHouseVal", axis=1)
y_test = test_df["MedHouseVal"]
print("Test data prepared successfully.")


# Evaluating the Model's Performance


In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
test_r2 = r2_score(y_test, y_pred)
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Print evaluation metrics
print(f"R² Score: {test_r2:.4f}")
print(f"RMSE: {test_rmse:.4f}")
print("Model evaluation completed.")


# Testing the Model with a Single Sample


In [None]:
# Sample DataFrame with feature values representing a single house
sample = pd.DataFrame({
    'MedInc': [4.8036],
    'HouseAge': [4.0],
    'AveRooms': [3.9246575342465753],
    'AveBedrms': [1.0359589041095891],
    'Population': [1050.0],
    'AveOccup': [1.797945205479452],
    'Latitude': [37.39],
    'Longitude': [-122.08],
    'RoomsPerHousehold': [2.182857142857143]
})

# Make prediction and convert to actual dollar amount (multiply by 100,000)
prediction = model.predict(sample)
print(f"Predicted house value: ${prediction[0] * 100000:,.2f}")
