### Data Capture Validation

In [3]:
import boto3
import sagemaker

# ✅ Retrieve stored variables
%store -r endpoint_name_single_request
%store -r s3_staging_dir

# ✅ Initialize AWS session
session = boto3.session.Session()
sagemaker_session = sagemaker.Session()

# ✅ Reinitialize `bucket`
bucket = sagemaker_session.default_bucket()

# ✅ Initialize S3 client
s3_client = boto3.client("s3")

# ✅ Define S3 prefix for captured data dynamically
data_capture_prefix = f"data_capture/{endpoint_name_single_request}/AllTraffic/"

# ✅ List objects in the S3 data capture folder
capture_files = s3_client.list_objects_v2(Bucket=bucket, Prefix=data_capture_prefix)

if "Contents" in capture_files:
    print("✅ JSON files available in data capture S3 path:")
    for obj in capture_files["Contents"]:
        print(f"- {obj['Key']}")
else:
    print("⚠️ No JSON files found. Send data to the endpoint for capture.")




sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
⚠️ No JSON files found. Send data to the endpoint for capture.


### Take sample from validation data to send to the predictor endpoint

In [4]:
import pandas as pd

# Load validation.csv
validation_file = "validation.csv"
validation_data = pd.read_csv(validation_file, header=None)

# Display the first few rows to confirm the structure
print(validation_data.head())
print(f"Validation data shape: {validation_data.shape}")

   0             1     2   3   4    5     6    7    8   9   ...   12  13  14  \
0   0  1.739940e+09  2011   4   1  202    62    5    2   0  ...    0   0   1   
1   0  1.739940e+09  2005   8   6   60    93   17   11   0  ...    5   0   0   
2   0  1.739940e+09  2015   4   6   92  3848  602  108  13  ...  279  78  20   
3   0  1.739940e+09  2007  12   0    2   284  100   32   9  ...   32   7   1   
4   0  1.739940e+09  2004  10   6  194  4127  555  241  85  ...  179  67   2   

      15     16    17    18  19     20  21  
0    176    106     0    64   0      6   8  
1    513    323     0    28   0    162  18  
2  37754  11074  1273  6656  10  18741  15  
3   5876   1641   533   947   0   2755  35  
4  25239   9236  6015  2344  19   7625  13  

[5 rows x 22 columns]
Validation data shape: (26102, 22)


In [5]:
# Select a subset of validation data
subset_size = 50  # Number of rows to send to the endpoint
subset_data = validation_data.iloc[:subset_size, 1:]  # Exclude the first column (target variable)

# Convert to CSV-like string for SageMaker endpoint
subset_data_str_list = subset_data.apply(lambda row: ",".join(row.astype(str)), axis=1).tolist()
print(f"Prepared {len(subset_data_str_list)} rows for sending to endpoint.")

Prepared 50 rows for sending to endpoint.


### Create predictor endpoint and send data to endpoint

In [6]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

# ✅ Retrieve stored endpoint name
%store -r endpoint_name_single_request

# ✅ Initialize the endpoint predictor dynamically
predictor = Predictor(
    endpoint_name=endpoint_name_single_request,  # Now using stored variable
    sagemaker_session=sagemaker.Session(),
    serializer=CSVSerializer(),
    deserializer=JSONDeserializer(),
)

# ✅ Send each row to the endpoint
responses = []
for row_str in subset_data_str_list:
    response = predictor.predict(row_str)  # Send to the endpoint
    responses.append(response)
    print(f"Input: {row_str}")
    print(f"Prediction: {response}")

print("✅ Finished sending all rows.")


Input: 1739940056.2871292,2011.0,4.0,1.0,202.0,62.0,5.0,2.0,0.0,2.0,0.0,0.0,0.0,1.0,176.0,106.0,0.0,64.0,0.0,6.0,8.0
Prediction: {'predictions': [{'score': 1.0493714398762677e-05}]}
Input: 1739940056.2871292,2005.0,8.0,6.0,60.0,93.0,17.0,11.0,0.0,1.0,0.0,5.0,0.0,0.0,513.0,323.0,0.0,28.0,0.0,162.0,18.0
Prediction: {'predictions': [{'score': 1.0493714398762677e-05}]}
Input: 1739940056.2871292,2015.0,4.0,6.0,92.0,3848.0,602.0,108.0,13.0,200.0,0.0,279.0,78.0,20.0,37754.0,11074.0,1273.0,6656.0,10.0,18741.0,15.0
Prediction: {'predictions': [{'score': 4.370238002593396e-06}]}
Input: 1739940056.2871292,2007.0,12.0,0.0,2.0,284.0,100.0,32.0,9.0,25.0,0.0,32.0,7.0,1.0,5876.0,1641.0,533.0,947.0,0.0,2755.0,35.0
Prediction: {'predictions': [{'score': 4.370238002593396e-06}]}
Input: 1739940056.2871292,2004.0,10.0,6.0,194.0,4127.0,555.0,241.0,85.0,49.0,1.0,179.0,67.0,2.0,25239.0,9236.0,6015.0,2344.0,19.0,7625.0,13.0
Prediction: {'predictions': [{'score': 4.370238002593396e-06}]}
Input: 1739940056.28712

### Validate that JSONL files were captured

In [15]:
# ✅ Use already initialized session, bucket, and S3 client
data_capture_prefix = f"data_capture/{endpoint_name_single_request}/AllTraffic/"

# ✅ List files in S3 data capture path
response = s3_client.list_objects_v2(Bucket=bucket, Prefix=data_capture_prefix)

if "Contents" in response:
    print(f"✅ Files found in S3 data capture path ({data_capture_prefix}):")
    for obj in response["Contents"]:
        print(obj["Key"])
else:
    print(f"⚠️ No files found in S3 data capture path: {data_capture_prefix}")


⚠️ No files found in S3 data capture path: data_capture/flight-delay-xgboost-endpoint-single-request/AllTraffic/


#### You should now be able to see your data in cloudwatch
- Go to Cloudwatch -> Logs -> Log Insights
- search `/aws/sagemaker/Endpoints/flight-delay-xgboost-endpoint-single-request`

In [13]:
# Import necessary libraries
import boto3
from sagemaker import get_execution_role, Session

# Initialize the SageMaker session and execution role
sagemaker_session = Session()  # Create a SageMaker session
role_arn = get_execution_role()  # Get the SageMaker execution role
bucket = sagemaker_session.default_bucket()  # Use the default S3 bucket for SageMaker
s3_client = boto3.client('s3')

# Print out the initialized variables
print("SageMaker session initialized.")
print("SageMaker execution role:", role_arn)
print("Default S3 bucket:", bucket)

SageMaker session initialized.
SageMaker execution role: arn:aws:iam::450772039932:role/service-role/AmazonSageMaker-ExecutionRole-20250127T165558
Default S3 bucket: sagemaker-us-east-2-450772039932


## Initialize Baseline Monitoring Setup

In [14]:
from sagemaker.model_monitor import DefaultModelMonitor, DatasetFormat
# Define Required Variables and Initialize SageMaker Session

# Define S3 URIs for baseline results and captured data
baseline_results_uri = f"s3://{bucket}/flight-delay-baseline-results/"
baseline_dataset_path = f"s3://{bucket}/data_capture/flight-delay-xgboost-endpoint-single-request/AllTraffic/"

print("Baseline dataset path:", baseline_dataset_path)
print("Baseline results URI:", baseline_results_uri)

# Initialize the model monitor
model_monitor = DefaultModelMonitor(
    role=role_arn,
    instance_type="ml.m5.xlarge",  # Adjust instance type if needed
    instance_count=1,
    max_runtime_in_seconds=3600,  # Maximum runtime for baseline generation
    sagemaker_session=sagemaker_session
)

Baseline dataset path: s3://sagemaker-us-east-2-450772039932/data_capture/flight-delay-xgboost-endpoint-single-request/AllTraffic/
Baseline results URI: s3://sagemaker-us-east-2-450772039932/flight-delay-baseline-results/


### <span style="color:red">TODO - fix baseline model error</span>
### Run baseline suggestion job
- UnexpectedStatusException: Error for Processing job baseline-suggestion-job-2025-02-17-05-13-52-651: Failed. Reason: AlgorithmError: Error: Could not find any json file under directory /opt/ml/processing/input/baseline_dataset_input. Please verify if the provided dataset path is correct or if data capturing in your Endpoint is turned on., exit code: 255

In [15]:
try:
    print("Running baseline suggestion job...")

    model_monitor.suggest_baseline(
        baseline_dataset=baseline_dataset_path,  # Path containing JSONL files
        dataset_format=DatasetFormat.json(),    # Ensure input matches JSON
        output_s3_uri=baseline_results_uri,     # Where baseline results will be stored
        wait=True                               # Wait for the job to complete
    )

    print(f"Baseline job completed. Results saved at: {baseline_results_uri}")

except Exception as e:
    print(f"Error running baseline suggestion job: {e}")
    raise

INFO:sagemaker:Creating processing-job with name baseline-suggestion-job-2025-02-17-05-13-52-651


Running baseline suggestion job...
...........[34m2025-02-17 05:15:38.248570: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory[0m
[34m2025-02-17 05:15:38.248598: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.[0m
[34m2025-02-17 05:15:39.851076: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory[0m
[34m2025-02-17 05:15:39.851107: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)[0m
[34m2025-02-17 05:15:39.851126: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-10-0-178-129.us-east-2.compute.i

UnexpectedStatusException: Error for Processing job baseline-suggestion-job-2025-02-17-05-13-52-651: Failed. Reason: AlgorithmError: Error: Could not find any json file under directory /opt/ml/processing/input/baseline_dataset_input. Please verify if the provided dataset path is correct or if data capturing in your Endpoint is turned on., exit code: 255

### Verify Baseline Results

In [None]:
# Examine results of baseline job
baseline_statistics_path = f"{baseline_results_uri}statistics.json"
baseline_constraints_path = f"{baseline_results_uri}constraints.json"

print("Baseline statistics stored at:", baseline_statistics_path)
print("Baseline constraints stored at:", baseline_constraints_path)

# Download and inspect baseline results
s3_client.download_file(bucket, "flight-delay-baseline-results/statistics.json", "statistics.json")
s3_client.download_file(bucket, "flight-delay-baseline-results/constraints.json", "constraints.json")

import json
with open("statistics.json", "r") as stats_file:
    baseline_statistics = json.load(stats_file)
    print("Sample Baseline Statistics:", json.dumps(baseline_statistics, indent=2))

with open("constraints.json", "r") as constraints_file:
    baseline_constraints = json.load(constraints_file)
    print("Sample Baseline Constraints:", json.dumps(baseline_constraints, indent=2))

### <span style="color:red">TODO - make alert in cloudwatch to trigger</span>
### below - saves random data to endpoint to trigger alert

In [1]:
import boto3
import sagemaker
from sagemaker.model_monitor import DefaultModelMonitor, DatasetFormat, CronExpressionGenerator

# Initialize Session and Define Bucket/Prefix
session = sagemaker.Session()
bucket = session.default_bucket()
prefix = "flight-delay-prediction-xgboost"

# Monitoring and baseline results paths
baseline_results_uri = f"s3://{bucket}/baseline_results/"
monitoring_results_uri = f"s3://{bucket}/monitoring_results/"

# Ensure Data Capture Paths (jsonl paths from captured traffic)
baseline_dataset_path = "s3://{bucket}/data_capture/flight-delay-xgboost-endpoint-single-request/AllTraffic/"

print("Baseline Dataset Path:", baseline_dataset_path)
print("Baseline Results Path:", baseline_results_uri)
print("Monitoring Results Path:", monitoring_results_uri)

# Configure the model monitor
model_monitor = DefaultModelMonitor(
    role=sagemaker.get_execution_role(),
    instance_type="ml.m5.xlarge",
    instance_count=1,
    max_runtime_in_seconds=3600,
    sagemaker_session=session,
)

# Schedule Data Monitoring
monitor_schedule_name = "FlightDelayMonitor"
model_monitor.create_monitoring_schedule(
    monitor_schedule_name=monitor_schedule_name,
    endpoint_input="flight-delay-xgboost-endpoint-single-request",  # Endpoint being monitored
    output_s3_uri=monitoring_results_uri,
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True,
)

print(f"Monitoring schedule created for endpoint: flight-delay-xgboost-endpoint-single-request")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Baseline Dataset Path: s3://{bucket}/data_capture/flight-delay-xgboost-endpoint-single-request/AllTraffic/
Baseline Results Path: s3://sagemaker-us-east-2-450772039932/baseline_results/
Monitoring Results Path: s3://sagemaker-us-east-2-450772039932/monitoring_results/
Monitoring schedule created for endpoint: flight-delay-xgboost-endpoint-single-request


In [2]:
import pandas as pd
import random

# Feature columns (ensure these match training data)
columns = [
    "year", "month", "day", "carrier", "airport", 
    "arr_flights", "arr_del15", "carrier_ct", "weather_ct", 
    "nas_ct", "security_ct", "late_aircraft_ct", "arr_cancelled", 
    "arr_diverted", "arr_delay", "carrier_delay", "weather_delay", 
    "nas_delay", "security_delay", "late_aircraft_delay", "delay_rate"
]

# Generate random fake data
def generate_fake_data(num_rows=100):
    fake_data = []
    for _ in range(num_rows):
        fake_row = [
            random.randint(2000, 2023),  # year
            random.randint(1, 12),  # month
            random.randint(1, 28),  # day
            random.choice(["AA", "DL", "UA", "SW", "AS", "EV"]),  # carrier
            random.choice(["JFK", "LAX", "ORD", "ATL", "DFW", "DEN"]),  # airport
            random.randint(50, 1000),  # arr_flights
            random.randint(0, 200),  # arr_del15
            random.randint(0, 50),  # carrier_ct
            random.randint(0, 50),  # weather_ct
            random.randint(0, 50),  # nas_ct
            random.randint(0, 50),  # security_ct
            random.randint(0, 50),  # late_aircraft_ct
            random.randint(0, 10),  # arr_cancelled
            random.randint(0, 10),  # arr_diverted
            random.uniform(0, 300),  # arr_delay
            random.uniform(0, 100),  # carrier_delay
            random.uniform(0, 100),  # weather_delay
            random.uniform(0, 100),  # nas_delay
            random.uniform(0, 100),  # security_delay
            random.uniform(0, 100),  # late_aircraft_delay
            random.uniform(1.0, 2.0),  # delay_rate
        ]
        fake_data.append(fake_row)
    return pd.DataFrame(fake_data, columns=columns)

# Write fake data to CSV
fake_data = generate_fake_data(500)  # Generate 500 records
fake_data_file = "fake_flight_data.csv"
fake_data.to_csv(fake_data_file, index=False, header=False)
print(f"Fake data generated and saved to {fake_data_file}")

Fake data generated and saved to fake_flight_data.csv


### Clean up script

In [17]:
import boto3
from time import sleep

# Initialize SageMaker client
sagemaker_client = boto3.client("sagemaker")

# Define resources to clean up
endpoint_name = "flight-delay-xgboost-endpoint-single-request"
monitoring_schedule_name = "ModelMonitorForXGBoost"  # Adjust if the schedule name is different
endpoint_config_name = f"{endpoint_name}-config"

# Step 1: Delete Monitoring Schedules
try:
    # List all monitoring schedules associated with the endpoint
    print(f"Listing monitoring schedules for endpoint: {endpoint_name}")
    monitoring_schedules = sagemaker_client.list_monitoring_schedules()['MonitoringScheduleSummaries']
    for schedule in monitoring_schedules:
        if schedule['EndpointName'] == endpoint_name:
            print(f"Deleting monitoring schedule: {schedule['MonitoringScheduleName']}")
            sagemaker_client.delete_monitoring_schedule(MonitoringScheduleName=schedule['MonitoringScheduleName'])
            sleep(10)  # Allow time for the monitoring schedule deletion process
    print("All monitoring schedules deleted.")
except sagemaker_client.exceptions.ResourceNotFound as e:
    print("Monitoring schedule not found. Skipping deletion.")
except Exception as e:
    print(f"Error while deleting monitoring schedules: {e}")
    raise

# Step 2: Delete Endpoint
try:
    print(f"Deleting endpoint: {endpoint_name}")
    sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
    print(f"Endpoint '{endpoint_name}' deleted.")
except sagemaker_client.exceptions.ResourceNotFound as e:
    print(f"Endpoint '{endpoint_name}' not found. Skipping deletion.")
except Exception as e:
    print(f"Error while deleting endpoint: {e}")
    raise

# Step 3: Delete Endpoint Configuration
try:
    print(f"Deleting endpoint configuration: {endpoint_config_name}")
    sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
    print(f"Endpoint configuration '{endpoint_config_name}' deleted.")
except sagemaker_client.exceptions.ResourceNotFound as e:
    print(f"Endpoint configuration '{endpoint_config_name}' not found. Skipping deletion.")
except Exception as e:
    print(f"Error while deleting endpoint configuration: {e}")
    raise

Listing monitoring schedules for endpoint: flight-delay-xgboost-endpoint-single-request
Deleting monitoring schedule: FlightDelayMonitor
All monitoring schedules deleted.
Deleting endpoint: flight-delay-xgboost-endpoint-single-request
Endpoint 'flight-delay-xgboost-endpoint-single-request' deleted.
Deleting endpoint configuration: flight-delay-xgboost-endpoint-single-request-config
Error while deleting endpoint configuration: An error occurred (ValidationException) when calling the DeleteEndpointConfig operation: Could not find endpoint configuration "flight-delay-xgboost-endpoint-single-request-config".


ClientError: An error occurred (ValidationException) when calling the DeleteEndpointConfig operation: Could not find endpoint configuration "flight-delay-xgboost-endpoint-single-request-config".