## Predictive Maintenance Pipeline

In [2]:
import json
import time
import boto3
import string
import sagemaker
import pandas as pd
import numpy as np
import awswrangler as wr

from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.workflow.pipeline_context import PipelineSession

In [3]:
region = sagemaker.Session().boto_region_name
print("Using AWS Region: {}".format(region))

Using AWS Region: us-east-1


In [4]:
boto3.setup_default_session(region_name = region)
boto_session = boto3.Session(region_name = region)

s3_client = boto3.client("s3", region_name = region)

sagemaker_boto_client = boto_session.client("sagemaker")
sagemaker_session = sagemaker.session.Session(
    boto_session = boto_session, sagemaker_client = sagemaker_boto_client
)
account_id = boto3.client("sts").get_caller_identity()["Account"]
sagemaker_role = sagemaker.get_execution_role()
pipeline_session = PipelineSession()

In [5]:
sagemaker.get_execution_role()

'arn:aws:iam::451633145432:role/service-role/AmazonSageMaker-ExecutionRole-20220302T144665'

In [6]:
bucket = "ideaaiml-demo"
prefix = "mlops/predictive-maintenance"

### Upload raw data to S3

In [13]:
s3_client.upload_file(
    Filename="datasets/PdM_telemetry.csv", Bucket=bucket, Key=f"{prefix}/data/raw/PdM_telemetry.csv"
)
s3_client.upload_file(
    Filename="datasets/PdM_errors.csv", Bucket=bucket, Key=f"{prefix}/data/raw/PdM_errors.csv"
)
s3_client.upload_file(
    Filename="datasets/PdM_maint.csv", Bucket=bucket, Key=f"{prefix}/data/raw/PdM_maint.csv"
)
s3_client.upload_file(
    Filename="datasets/PdM_failures.csv", Bucket=bucket, Key=f"{prefix}/data/raw/PdM_failures.csv"
)
s3_client.upload_file(
    Filename="datasets/PdM_machines.csv", Bucket=bucket, Key=f"{prefix}/data/raw/PdM_machines.csv"
)

In [229]:
!pip install s3fs

Collecting botocore<1.24.22,>=1.24.21 (from aiobotocore~=2.3.4->s3fs)
  Using cached botocore-1.24.21-py3-none-any.whl (8.6 MB)
Installing collected packages: botocore
  Attempting uninstall: botocore
    Found existing installation: botocore 1.29.112
    Uninstalling botocore-1.29.112:
      Successfully uninstalled botocore-1.29.112
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.25.63 requires botocore==1.27.62, but you have botocore 1.24.21 which is incompatible.
awscli 1.25.63 requires rsa<4.8,>=3.1.2, but you have rsa 4.9 which is incompatible.
awswrangler 2.20.1 requires botocore<2.0.0,>=1.27.11, but you have botocore 1.24.21 which is incompatible.
boto3 1.26.97 requires botocore<1.30.0,>=1.29.97, but you have botocore 1.24.21 which is incompatible.[0m[31m
[0mSuccessfully installed botocore-1.24.21
[0m

### Read Data from S3

In [224]:
telemetry_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_telemetry.csv"
errors_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_errors.csv"
maint_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_maint.csv"
failures_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_failures.csv"
machines_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_machines.csv"

In [73]:
response = s3_client.get_object(Bucket = bucket, Key = f"{prefix}/data/raw/PdM_telemetry.csv")
telemetry = pd.read_csv(response.get("Body"))
telemetry.head()

Unnamed: 0,datetime,machineID,volt,rotate,pressure,vibration
0,2015-01-01 06:00:00,1,176.217853,418.504078,113.077935,45.087686
1,2015-01-01 07:00:00,1,162.879223,402.74749,95.460525,43.413973
2,2015-01-01 08:00:00,1,170.989902,527.349825,75.237905,34.178847
3,2015-01-01 09:00:00,1,162.462833,346.149335,109.248561,41.122144
4,2015-01-01 10:00:00,1,157.610021,435.376873,111.886648,25.990511


In [230]:
telemetry = pd.read_csv(telemetry_data_uri)
# errors = wr.s3.read_csv(errors_data_uri)
# maint = wr.s3.read_csv(maint_data_uri)
# failures = wr.s3.read_csv(failures_data_uri)
# machines = wr.s3.read_csv(machines_data_uri)

PermissionError: Forbidden

### Define Parameters to Parametrize Pipeline Execution

In [7]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
    ParameterFloat,
)

In [8]:
processing_instance_count = ParameterInteger(name = "ProcessingInstanceCount", default_value = 1)
instance_type = ParameterString(name = "TrainingInstanceType", default_value = "ml.m5.xlarge")

model_approval_status = ParameterString(
    name = "ModelApprovalStatus", default_value = "PendingManualApproval"
)

### Define a Processing Step for Feature Engineering

### Preprocessing Script

In [9]:
%%writefile scripts/preprocessing.py
import numpy as np
import pandas as pd
import boto3
from io import StringIO
import awswrangler as wr

base_dir = "/opt/ml/processing"
bucket = "ideaaiml-demo"
prefix = "mlops/predictive-maintenance"

def upload_file_s3(df, name):
    boto3.setup_default_session(region_name = "us-east-1")
    s3_client = boto3.client("s3", region_name = "us-east-1")
    with StringIO() as csv_buffer:
        df.to_csv(csv_buffer, index = False)

        response = s3_client.put_object(
            Bucket = bucket, Key = f"{prefix}/data/preprocessed/{name}.csv", Body = csv_buffer.getvalue()
        )
        status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

        if status == 200:
            print(f"Successful S3 put_object response. Status - {status}")
        else:
            print(f"Unsuccessful S3 put_object response. Status - {status}")

# Convert to datetime datatype
def datetime_datatype(df):
    print("Converting to type datetime")
    df['datetime'] = pd.to_datetime(df['datetime'], format="%Y-%m-%d %H:%M:%S")
    return df


# Convert to category datatype
def category_datatype(df, column_name):
    print("Converting to type category")
    df[column_name] = df[column_name].astype('category')
    return df


# Lag Features from Telemetry
def telemetry_features(df):
    df = datetime_datatype(df)
    # Calculate mean values for telemetry features -- 3 hours rolling window
    print("Calculate mean values for telemetry features -- 3 hours rolling window")
    temp = []
    fields = ['volt', 'rotate', 'pressure', 'vibration']
    for col in fields:
        temp.append(pd.pivot_table(df,
                                   index = 'datetime',
                                   columns = 'machineID',
                                   values = col).resample('3H', closed = 'left', label = 'right').mean().unstack())
    telemetry_mean_3h = pd.concat(temp, axis = 1)
    telemetry_mean_3h.columns = [i + 'mean_3h' for i in fields]
    telemetry_mean_3h.reset_index(inplace = True)

    # repeat for standard deviation
    print("Calculate standard deviation for telemetry features -- 3 hours rolling window")
    temp = []
    for col in fields:
        temp.append(pd.pivot_table(df,
                                   index = 'datetime',
                                   columns = 'machineID',
                                   values = col).resample('3H', closed = 'left', label = 'right').std().unstack())
    telemetry_sd_3h = pd.concat(temp, axis = 1)
    telemetry_sd_3h.columns = [i + 'sd_3h' for i in fields]
    telemetry_sd_3h.reset_index(inplace = True)
    
    # Calculate mean values for telemetry features -- 24 hours rolling window
    print("Calculate mean values for telemetry features -- 24 hours rolling window")
    temp = []
    fields = ['volt', 'rotate', 'pressure', 'vibration']
    for col in fields:
        temp.append(pd.pivot_table(df,
                                   index = 'datetime',
                                   columns = 'machineID',
                                   values = col)
                    .resample('3H', closed = 'left', label = 'right')
                    .first()
                    .unstack()
                    .rolling(window = 24, center = False).mean())
    telemetry_mean_24h = pd.concat(temp, axis = 1)
    telemetry_mean_24h.columns = [i + 'mean_24h' for i in fields]
    telemetry_mean_24h.reset_index(inplace = True)
    telemetry_mean_24h = telemetry_mean_24h.loc[-telemetry_mean_24h['voltmean_24h'].isnull()]

    # repeat for standard deviation
    print("Calculate standard deviation for telemetry features -- 24 hours rolling window")
    temp = []
    fields = ['volt', 'rotate', 'pressure', 'vibration']
    for col in fields:
        temp.append(pd.pivot_table(df,
                                   index = 'datetime',
                                   columns = 'machineID',
                                   values = col)
                    .resample('3H', closed='left', label='right')
                    .first()
                    .unstack()
                    .rolling(window = 24, center = False).std())
    telemetry_sd_24h = pd.concat(temp, axis = 1)
    telemetry_sd_24h.columns = [i + 'sd_24h' for i in fields]
    telemetry_sd_24h = telemetry_sd_24h.loc[-telemetry_sd_24h['voltsd_24h'].isnull()]
    telemetry_sd_24h.reset_index(inplace = True)
    
    telemetry_feat = pd.concat([telemetry_mean_3h,
                            telemetry_sd_3h.iloc[:, 2:6],
                            telemetry_mean_24h.iloc[:, 2:6],
                            telemetry_sd_24h.iloc[:, 2:6]], axis = 1).dropna()

    upload_file_s3(telemetry_feat, "telemetry")
    
    return telemetry_feat


# Lag Features for Errors
def errors_lag_features(df):
    df = datetime_datatype(df)
    df = category_datatype(df, 'errorID')
    print("Lag features for errors")
    error_count = pd.get_dummies(df.set_index('datetime')).reset_index()
    error_count.columns = ['datetime', 'machineID', 'error1', 'error2', 'error3', 'error4', 'error5']
    error_count = error_count.groupby(['machineID', 'datetime']).sum().reset_index()
    error_count = telemetry[['datetime', 'machineID']].merge(error_count, on = ['machineID', 'datetime'], how = 'left').fillna(0.0)
    temp = []
    fields = ['error%d' % i for i in range(1, 6)]
    for col in fields:
        temp.append(pd.pivot_table(error_count,
                                   index = 'datetime',
                                   columns = 'machineID',
                                   values = col)
                    .resample('3H', closed='left', label='right')
                    .first()
                    .unstack()
                    .rolling(window = 24, center = False).sum())
    error_count = pd.concat(temp, axis = 1)
    error_count.columns = [i + 'count' for i in fields]
    error_count.reset_index(inplace = True)
    error_count = error_count.dropna()
    
    upload_file_s3(error_count, "errors")
    
    return error_count


# Maintenance Features
def maintenance_features(df):
    df = datetime_datatype(df)
    df = category_datatype(df, 'comp')
    print("Maintenance Features -- Days since last replacement")
    comp_rep = pd.get_dummies(df.set_index('datetime')).reset_index()
    comp_rep.columns = ['datetime', 'machineID', 'comp1', 'comp2', 'comp3', 'comp4']

    # combine repairs for a given machine in a given hour
    comp_rep = comp_rep.groupby(['machineID', 'datetime']).sum().reset_index()

    # add timepoints where no components were replaced
    comp_rep = telemetry[['datetime', 'machineID']].merge(comp_rep,
                                                          on=['datetime', 'machineID'],
                                                          how='outer').fillna(0).sort_values(by=['machineID', 'datetime'])
    components = ['comp1', 'comp2', 'comp3', 'comp4']
    for comp in components:
        comp_rep.loc[comp_rep[comp] < 1, comp] = None
        comp_rep.loc[-comp_rep[comp].isnull(),
                     comp] = comp_rep.loc[-comp_rep[comp].isnull(), 'datetime']
        comp_rep[comp] = comp_rep[comp].fillna(method = 'ffill')

    comp_rep = comp_rep.loc[comp_rep['datetime'] > pd.to_datetime('2015-01-01')]
    for comp in components:
        comp_rep[comp] = (comp_rep["datetime"] - pd.to_datetime(comp_rep[comp])) / np.timedelta64(1, "D")
        
    upload_file_s3(comp_rep, "maint")
    
    return comp_rep


# Failures Features
def failure_features(df):
    print("Failure features")
    df = datetime_datatype(df)
    df = category_datatype(df, 'failure')
    upload_file_s3(df, "failures")
    return df


# Final Features
def final_features(telemetry_df, errors_df, maint_df, machines_df):
    upload_file_s3(machines_df, "machines")
    print("Final features")
    final_feat = telemetry_df.merge(errors_df, on = ['datetime', 'machineID'], how = 'left')
    final_feat = final_feat.merge(maint_df, on = ['datetime', 'machineID'], how = 'left')
    final_feat = final_feat.merge(machines_df, on = ['machineID'], how = 'left')
    return final_feat


# Label Construction
def label_construct(tele_df, error_df, maint_df, machine_df, failure_df):
    print("----- Final Features -----")
    final_feat = final_features(tele_df, error_df, maint_df, machine_df)
    
    print("----- Label Construction -----")
    labeled_features = pd.DataFrame()
    labeled_features = final_feat.merge(
        failure_df, on = ['datetime', 'machineID'], how = 'left')
    labeled_features['failure'] = labeled_features['failure'].astype(str)
    labeled_features['failure'] = labeled_features['failure'].fillna(method = 'bfill', limit = 7)
    labeled_features['failure'] = labeled_features['failure'].replace('nan', 'none')
    print("----- Preprocessing completed -----")
    
    upload_file_s3(labeled_features, "preprocessed")
#     pd.DataFrame(labeled_features).to_csv(f"{base_dir}/preprocessed/final_data.csv", index = False)


if __name__ == "__main__":

    telemetry_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_telemetry.csv"
    errors_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_errors.csv"
    maint_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_maint.csv"
    failures_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_failures.csv"
    machines_data_uri = f"s3://{bucket}/{prefix}/data/raw/PdM_machines.csv"
    
    telemetry = wr.s3.read_csv(telemetry_data_uri)
    errors = wr.s3.read_csv(errors_data_uri)
    maint = wr.s3.read_csv(maint_data_uri)
    failures = wr.s3.read_csv(failures_data_uri)
    machines = wr.s3.read_csv(machines_data_uri)
    
    telemetry_df = telemetry_features(telemetry)
    errors_df = errors_lag_features(errors)
    maint_df = maintenance_features(maint)
    failures_df = failure_features(failures)
    machines_df = category_datatype(machines, 'model')
    
    label_construct(telemetry_df, errors_df, maint_df, machines_df, failures_df)

Overwriting scripts/preprocessing.py


### Feature Store Creation Script

In [10]:
%%writefile scripts/featurestore.py
import numpy as np
import pandas as pd
import boto3
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup
import awswrangler as wr

base_dir = "/opt/ml/processing"
bucket = "ideaaiml-demo"
prefix = "mlops/predictive-maintenance"

boto_session = boto3.Session(region_name = "us-east-1")
sagemaker_boto_client = boto_session.client("sagemaker")
featurestore_runtime = boto_session.client(
    service_name = "sagemaker-featurestore-runtime", region_name = "us-east-1"
)
try:
    sagemaker_role = sagemaker.get_execution_role()
    print(f"Sagemaker Role for Feature Store file: {sagemaker_role}")
except ValueError:
    sagemaker_role = 'arn:aws:iam::451633145432:role/service-role/AmazonSageMaker-ExecutionRole-20220302T144665'
    
feature_store_session = sagemaker.Session(
    boto_session = boto_session,
    sagemaker_client = sagemaker_boto_client,
    sagemaker_featurestore_runtime_client = featurestore_runtime,
)

# ------------------------------------ Read Data
telemetry_data_uri = f"s3://{bucket}/{prefix}/data/preprocessed/telemetry.csv"
errors_data_uri = f"s3://{bucket}/{prefix}/data/preprocessed/errors.csv"
maint_data_uri = f"s3://{bucket}/{prefix}/data/preprocessed/maint.csv"
failures_data_uri = f"s3://{bucket}/{prefix}/data/preprocessed/failures.csv"
machines_data_uri = f"s3://{bucket}/{prefix}/data/preprocessed/machines.csv"

telemetry = wr.s3.read_csv(telemetry_data_uri)
errors = wr.s3.read_csv(errors_data_uri)
maint = wr.s3.read_csv(maint_data_uri)
failures = wr.s3.read_csv(failures_data_uri)
machines = wr.s3.read_csv(machines_data_uri)

# ------------------------------------ Add Timestamp
telemetry['event_time'] = pd.to_datetime("now").timestamp()
errors['event_time'] = pd.to_datetime("now").timestamp()
maint['event_time'] = pd.to_datetime("now").timestamp()
failures['event_time'] = pd.to_datetime("now").timestamp()
machines['event_time'] = pd.to_datetime("now").timestamp()

# ------------------------------------ Create Feature Group
telemetry_feature_group = FeatureGroup(name = 'telemetry_fg', sagemaker_session = feature_store_session)
errors_feature_group = FeatureGroup(name = 'errors_fg', sagemaker_session = feature_store_session)
maintenance_feature_group = FeatureGroup(name = 'maintenance_fg', sagemaker_session = feature_store_session)
failures_feature_group = FeatureGroup(name = 'failures_fg', sagemaker_session = feature_store_session)
machines_feature_group = FeatureGroup(name = 'machines_fg', sagemaker_session = feature_store_session)

# ------------------------------------ Loading Definitions
telemetry_feature_group.load_feature_definitions(data_frame = telemetry)
errors_feature_group.load_feature_definitions(data_frame = errors)
maintenance_feature_group.load_feature_definitions(data_frame = maint)
failures_feature_group.load_feature_definitions(data_frame = failures)
machines_feature_group.load_feature_definitions(data_frame = machines)

record_identifier_feature_name = "machineID"
event_time_feature_name = "event_time"

# ------------------------------------ Telemetry Feature Store
try:
    telemetry_feature_group.create(
        s3_uri = f"s3://{bucket}/{prefix}/feature_store_data",
        record_identifier_name = record_identifier_feature_name,
        event_time_feature_name = event_time_feature_name,
        role_arn = sagemaker_role,
        enable_online_store = True,
    )
    time.sleep(30)
    print(f'Create "telemetry" feature group: SUCCESS')
except Exception as e:
    code = e.response.get("Error").get("Code")
    if code == "ResourceInUse":
        print("Using existing feature group")
    else:
        raise (e)

# ------------------------------------ Errors Feature Store      
try:
    errors_feature_group.create(
        s3_uri = f"s3://{bucket}/{prefix}/feature_store_data",
        record_identifier_name = record_identifier_feature_name,
        event_time_feature_name = event_time_feature_name,
        role_arn = sagemaker_role,
        enable_online_store = True,
    )
    time.sleep(30)
    print(f'Create "errors" feature group: SUCCESS')
except Exception as e:
    code = e.response.get("Error").get("Code")
    if code == "ResourceInUse":
        print("Using existing feature group")
    else:
        raise (e)

# ------------------------------------ Maintenance Feature Store
try:
    maintenance_feature_group.create(
        s3_uri = f"s3://{bucket}/{prefix}/feature_store_data",
        record_identifier_name = record_identifier_feature_name,
        event_time_feature_name = event_time_feature_name,
        role_arn = sagemaker_role,
        enable_online_store = True,
    )
    time.sleep(30)
    print(f'Create "maintenance" feature group: SUCCESS')
except Exception as e:
    code = e.response.get("Error").get("Code")
    if code == "ResourceInUse":
        print("Using existing feature group")
    else:
        raise (e)

# ------------------------------------ Failures Feature Store
try:
    failures_feature_group.create(
        s3_uri = f"s3://{bucket}/{prefix}/feature_store_data",
        record_identifier_name = record_identifier_feature_name,
        event_time_feature_name = event_time_feature_name,
        role_arn = sagemaker_role,
        enable_online_store = True,
    )
    time.sleep(30)
    print(f'Create "failures" feature group: SUCCESS')
except Exception as e:
    code = e.response.get("Error").get("Code")
    if code == "ResourceInUse":
        print("Using existing feature group")
    else:
        raise (e)

# ------------------------------------ Machines Feature Store
try:
    machines_feature_group.create(
        s3_uri = f"s3://{bucket}/{prefix}/feature_store_data",
        record_identifier_name = record_identifier_feature_name,
        event_time_feature_name = event_time_feature_name,
        role_arn = sagemaker_role,
        enable_online_store = True,
    )
    time.sleep(30)
    print(f'Create "machines" feature group: SUCCESS')
except Exception as e:
    code = e.response.get("Error").get("Code")
    if code == "ResourceInUse":
        print("Using existing feature group")
    else:
        raise (e)

# ------------------------------------ Ingesting Data
while (
    telemetry_feature_group.describe()['FeatureGroupStatus'] == 'Creating'):
    print("Feature Group Creating")
    time.sleep(60)
else:
    print("Feature Group Created")
    ## Below code needs to run only once to ingest the data into feature store
    #---------------------------------------------------------------
#     telemetry_feature_group.ingest(data_frame = telemetry, max_workers = 3, wait = True)
#     errors_feature_group.ingest(data_frame = errors, max_workers = 3, wait = True)
#     maintenance_feature_group.ingest(data_frame = maint, max_workers = 3, wait = True)
#     failures_feature_group.ingest(data_frame = failures, max_workers = 3, wait = True)
#     machines_feature_group.ingest(data_frame = machines, max_workers = 3, wait = True)
#     print("Feature Data Ingested")
    #---------------------------------------------------------------

Overwriting scripts/featurestore.py


### Train Test Split Script

In [11]:
%%writefile scripts/train_test_split_data.py
import pandas as pd
import boto3
from io import StringIO
import awswrangler as wr

base_dir = "/opt/ml/processing"
bucket = "ideaaiml-demo"
prefix = "mlops/predictive-maintenance"

def upload_file_s3(df, name):
    boto3.setup_default_session(region_name = "us-east-1")
    s3_client = boto3.client("s3", region_name = "us-east-1")
    with StringIO() as csv_buffer:
        df.to_csv(csv_buffer, index = False)

        response = s3_client.put_object(
            Bucket = bucket, Key = f"{prefix}/data/train-test/{name}.csv", Body = csv_buffer.getvalue()
        )
        status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

        if status == 200:
            print(f"Successful S3 put_object response. Status - {status}")
        else:
            print(f"Unsuccessful S3 put_object response. Status - {status}")

def train_test_split_script(labeled_features):
    threshold_dates = [[pd.to_datetime('2015-07-31 01:00:00'), pd.to_datetime('2015-08-01 01:00:00')],
                   [pd.to_datetime('2015-08-31 01:00:00'), pd.to_datetime('2015-09-01 01:00:00')],
                   [pd.to_datetime('2015-09-30 01:00:00'), pd.to_datetime('2015-10-01 01:00:00')]]
    
    for last_train_date, first_test_date in threshold_dates:
        # split out training and test data
        print(labeled_features['datetime'][0])
        train_y = labeled_features.loc[labeled_features['datetime'] < last_train_date, 'failure']
        train_data = pd.get_dummies(labeled_features.loc[labeled_features['datetime'] < last_train_date].drop(['datetime',
                                                                                                            'machineID',
                                                                                                              'failure'], axis = 1))
        test_y = labeled_features.loc[labeled_features['datetime'] > last_train_date, 'failure']
        test_data = pd.get_dummies(labeled_features.loc[labeled_features['datetime'] > first_test_date].drop(['datetime',
                                                                                                           'machineID',
                                                                                                             'failure'], axis = 1))
    
    train_data['failure'] = train_y
    test_data['failure'] = test_y
    
    upload_file_s3(train_data, "train")
    upload_file_s3(test_data, "test")

    pd.DataFrame(train_data).to_csv(f"{base_dir}/train/train.csv", index = False)
    pd.DataFrame(test_data).to_csv(f"{base_dir}/test/test.csv", index = False)
    
if __name__ == "__main__":
    final_data_uri = f"s3://{bucket}/{prefix}/data/preprocessed/preprocessed.csv"
    final_data = wr.s3.read_csv(final_data_uri)
    final_data['datetime'] = pd.to_datetime(final_data['datetime'], format="%Y-%m-%d %H:%M:%S")
    train_test_split_script(final_data)

Overwriting scripts/train_test_split_data.py


### Create an instance of a FrameworkProcessor

In [12]:
from sagemaker.processing import FrameworkProcessor
from sagemaker.sklearn import SKLearn

sklearn_processor = FrameworkProcessor(
    estimator_cls = SKLearn,
    framework_version = "1.2-1",
    instance_type = "ml.m5.xlarge",
    instance_count = processing_instance_count,
    base_job_name = "sklearn-pred-maint-process",
    role = sagemaker_role,
    sagemaker_session = pipeline_session,
)

### Creating Processing Step

In [13]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

processor_args = sklearn_processor.run(
    code = "preprocessing.py",
    source_dir = "scripts"
)

preprocess_step = ProcessingStep(name = "PdM-Data-Read-And-PreProcessing", step_args = processor_args)



In [14]:
train_test_args = sklearn_processor.run(
    outputs = [
        ProcessingOutput(output_name = "train", source = "/opt/ml/processing/train"),
        ProcessingOutput(output_name = "test", source = "/opt/ml/processing/test"),
    ],
    code = "train_test_split_data.py",
    source_dir = "scripts"
)

train_test_split_step = ProcessingStep(name = "PdM-Train-Test-Data-Split", step_args = train_test_args, depends_on = [preprocess_step.name])

In [15]:
fs_data = sklearn_processor.run(
    code = "featurestore.py",
    source_dir = "scripts"
)
feature_store_step = ProcessingStep(name = "PdM-FeatureStore-Creation", step_args = fs_data, depends_on = [preprocess_step.name])

### Define a Training Step to Train a Model

In [16]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep
from sagemaker.sklearn.model import SKLearnModel

In [17]:
# SKLearn estimator is used for end-to-end training and deployment
sklearn_estimator = SKLearn(
    entry_point = "scripts/rf_script-no-featurenames.py",
    role = sagemaker_role,
    instance_count = 1,
    instance_type = "ml.m5.xlarge",
    framework_version = "1.2-1",
    base_job_name = "rf-scikit",
    hyperparameters = {
        "n-estimators": 100,
        "min-samples-leaf": 3,
    },
    sagemaker_session = pipeline_session,
)

In [18]:
train_args = sklearn_estimator.fit(
    inputs = {
        "train": TrainingInput(
            s3_data = train_test_split_step.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
            content_type = "text/csv"
        ),
        "test": TrainingInput(
            s3_data = train_test_split_step.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            content_type = "text/csv",
        )
    }
)

In [19]:
training_step = TrainingStep(
    name = "PdM-ModelTraininig",
    step_args = train_args,
    depends_on = [train_test_split_step.name]
)

### Define a Model Step to Create a Model

In [91]:
# from sagemaker.model import Model
# from sagemaker.workflow.model_step import ModelStep

# model = SKLearnModel(
#     model_data = training_step.properties.ModelArtifacts.S3ModelArtifacts,
#     role = sagemaker_role,
#     entry_point = "scripts/rf_script.py",
#     framework_version = "1.2-1",
#     name = "Predictive-Maintenance-Model-DataRest",
#     sagemaker_session = pipeline_session,
# )

# # model = Model(
# # #     image_uri = sagemaker.image_uris.retrieve('sklearn', region = region, version = "1.2-1",),
# #     image_uri = training_step.properties.AlgorithmSpecification.TrainingImage,
# #     model_data = training_step.properties.ModelArtifacts.S3ModelArtifacts,
# #     sagemaker_session = pipeline_session,
# #     name = "Predictive-Maintenance-Model-DataRest",
# #     role = sagemaker_role,
# # )

# create_model_step = ModelStep(
#     name = "PdM-PreDeployment",    
#     step_args = model.create(instance_type="ml.m5.xlarge")
# )

In [34]:
# from sagemaker.sklearn.model import SKLearnModel
# from sagemaker.workflow.model_step import ModelStep

# # Using the SKLearn Model to create model step
# # SKLearnModel is used for deployment-only workflows, where the model has already been trained
# model = SKLearnModel(
#     entry_point = "scripts/rf_script.py",
#     model_data = training_step.properties.ModelArtifacts.S3ModelArtifacts,
#     sagemaker_session = pipeline_session,
#     framework_version = "1.2-1",
#     role = sagemaker_role,
# )

# step_create_model = ModelStep(
#     name = "PdM-Create-Model",
#     step_args = model.create(instance_type = "ml.m5.large"),
#     depends_on = [training_step.name]
# )

### Define a Transform Step to Perform Batch Transformation

In [56]:
# Throwing AttributeError: 'NoneType' object has no attribute 'startswith'
## ----------------------------------
from sagemaker.transformer import Transformer
from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep

transformer = Transformer(
    model_name = "pipelines-s3w0zfsb10v5-PdM-Model-Creation-C-3ONBZoPQPt",
    instance_type = "ml.m5.xlarge",
    instance_count = 1,
    output_path = f"s3://{bucket}/{prefix}/PdM-Batch-Transform",
    accept = 'text/csv',
    strategy = 'MultiRecord',
    base_transform_job_name = "PdM-Batch-Transform-Job",
    env = {
        'SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT': 'text/csv',
        'SAGEMAKER_DEFAULT_INVOCATIONS_CONTENT_TYPE': 'text/csv'
    }
)

transform_step = TransformStep(
    name = "PdM-Batch-Transform", transformer = transformer, inputs = TransformInput(data = batch_data)
)
## ----------------------------------

transformer.transform(
    data = batch_data_uri,
    content_type = 'text/csv',
)

INFO:sagemaker:Creating transform job with name: PdM-Batch-Transform-Job-2023-05-02-06-14-51-647


..........................[34m2023-05-02 06:19:12,888 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2023-05-02 06:19:12,890 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2023-05-02 06:19:12,891 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
    

KeyboardInterrupt: 

### Run Bias Metrics with Clarify

In [59]:
import pathlib

# Initializes a configuration of both input and output datasets.
bias_data_config = sagemaker.clarify.DataConfig(
    s3_data_input_path = train_test_split_step.properties.ProcessingOutputConfig.Outputs[
        "train"
    ].S3Output.S3Uri,
    s3_output_path = f"s3://{bucket}/{prefix}/clarify-output/pipeline/bias",
    label = "failure",
    dataset_type = "text/csv",
)

# Initializes a configuration of the sensitive groups in the dataset.
bias_config = sagemaker.clarify.BiasConfig(
    label_values_or_threshold = [0],
    facet_name = "age",
    facet_values_or_threshold = [1],
)

analysis_config = bias_data_config.get_config()
analysis_config.update(bias_config.get_config())
analysis_config["methods"] = {"pre_training_bias": {"methods": "all"}}

clarify_config_dir = pathlib.Path("config")
clarify_config_dir.mkdir(exist_ok = True)
with open(clarify_config_dir / "analysis_config.json", "w") as f:
    json.dump(analysis_config, f)

s3_client.upload_file(
    Filename = "config/analysis_config.json",
    Bucket = bucket,
    Key = f"{prefix}/clarify-config/analysis_config.json",
)

In [62]:
clarify_processor = sagemaker.processing.Processor(
    base_job_name = "PdM-Clarify-Processor",
    image_uri = sagemaker.clarify.image_uris.retrieve(framework = "clarify", region = region, version = "1.2-1"),
    role = sagemaker_role,
    instance_count = 1,
    instance_type = "ml.c5.xlarge",
)

clarify_step = ProcessingStep(
    name = "PdM-ClarifyProcessor",
    processor = clarify_processor,
    inputs = [
        sagemaker.processing.ProcessingInput(
            input_name = "analysis_config",
            source = f"s3://{bucket}/{prefix}/clarify-config/analysis_config.json",
            destination = "/opt/ml/processing/input/config",
        ),
        sagemaker.processing.ProcessingInput(
            input_name = "dataset",
            source = train_test_split_step.properties.ProcessingOutputConfig.Outputs[
                "train"
            ].S3Output.S3Uri,
            destination = "/opt/ml/processing/input/data",
        ),
    ],
    outputs = [
        sagemaker.processing.ProcessingOutput(
            source = "/opt/ml/processing/output/analysis.json",
            destination = f"s3://{bucket}/{prefix}/clarify-output/pipeline/bias",
            output_name = "analysis_result",
        )
    ],
)

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


### Define a Register Model Step to Create a Model Package

In [None]:
mpg_name = prefix

model_metrics = demo_helpers.ModelMetrics(
    bias=sagemaker.model_metrics.MetricsSource(
        s3_uri=clarify_step.properties.ProcessingOutputConfig.Outputs[
            "analysis_result"
        ].S3Output.S3Uri,
        content_type="application/json",
    )
)

In [None]:
register_args = model.register(
    content_types = ["text/csv"],
    response_types = ["text/csv"],
    inference_instances = ["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances = ["ml.m5.xlarge"],
    model_package_group_name = "PdM-ModelPackageGroupName",
    approval_status = model_approval_status,
)
step_register = ModelStep(name = "PdM-RegisterModel", step_args = register_args)

In [20]:
from sagemaker.workflow.step_collections import RegisterModel

register_step = RegisterModel(
    name = "PdM-Register-Model",
    estimator = sklearn_estimator,
    model_data = training_step.properties.ModelArtifacts.S3ModelArtifacts,
    content_types = ["text/csv"],
    response_types = ["text/csv"],
    inference_instances = ["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances = ["ml.m5.xlarge"],
    model_package_group_name = "predictive-maintenance",
    approval_status = model_approval_status,
)

In [179]:
deploy_args = sklearn_processor.run(
    code = "deploy_model.py",
    source_dir = "scripts",
    arguments = [
        "--model-data",
        training_step.properties.ModelArtifacts.S3ModelArtifacts,
        "--region",
        region,
        "--endpoint-instance-type",
        "ml.m5.xlarge",
        "--endpoint-name",
        "PdM-SKLearn-Pipeline-Endpoint-ReTraining",
    ],
)

deploy_step = ProcessingStep(
    name = "PdM-DeployModel",
    step_args = deploy_args
)



### Define a Pipeline of Parameters, Steps, and Conditions

In [180]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = "Predictive-Maintenance-Pipeline"
pipeline = Pipeline(
    name = pipeline_name,
    parameters = [
        processing_instance_count,
        instance_type,
        model_approval_status,
    ],
    steps = [preprocess_step, 
             feature_store_step, 
             train_test_split_step, 
             training_step,
             register_step, 
             deploy_step],
)

In [181]:
definition = json.loads(pipeline.definition())
definition

INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/7f429cfb8f7771e6c5a31bd861d3bb2b/sourcedir.tar.gz
INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/7f429cfb8f7771e6c5a31bd861d3bb2b/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/54f0ef6bee583ff9186b762aaf572190/runproc.sh
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/54f0ef6bee583ff9186b762aaf572190/runproc.sh
INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/5975677b33077216604c8b482c658a9d/sourcedir.tar.gz
INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/5975677b33077216604c8b482c658a

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ProcessingInstanceCount',
   'Type': 'Integer',
   'DefaultValue': 1},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ModelApprovalStatus',
   'Type': 'String',
   'DefaultValue': 'PendingManualApproval'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'PdM-Data-Read-And-PreProcessing',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xlarge',
      'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},
      'VolumeSizeInGB': 30}},
    'AppSpecification': {'ImageUri': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3',
     'ContainerEntrypoint': ['/bin/bash',
      '/opt/ml/processing/input/entrypoint/runproc.sh']},
    'RoleArn': 'arn:aws:iam::4516

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ProcessingInstanceCount',
   'Type': 'Integer',
   'DefaultValue': 1},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ModelApprovalStatus',
   'Type': 'String',
   'DefaultValue': 'PendingManualApproval'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'PdM-Data-Read-And-PreProcessing',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xlarge',
      'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},
      'VolumeSizeInGB': 30}},
    'AppSpecification': {'ImageUri': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3',
     'ContainerEntrypoint': ['/bin/bash',
      '/opt/ml/processing/input/entrypoint/runproc.sh']},
    'RoleArn': 'arn:aws:iam::4516

In [182]:
pipeline.upsert(role_arn = sagemaker_role)

INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/7f429cfb8f7771e6c5a31bd861d3bb2b/sourcedir.tar.gz
INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/7f429cfb8f7771e6c5a31bd861d3bb2b/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/54f0ef6bee583ff9186b762aaf572190/runproc.sh
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/54f0ef6bee583ff9186b762aaf572190/runproc.sh
INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/5975677b33077216604c8b482c658a9d/sourcedir.tar.gz
INFO:sagemaker.processing:Uploaded scripts to s3://sagemaker-us-east-1-451633145432/Predictive-Maintenance-Pipeline/code/5975677b33077216604c8b482c658a

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:451633145432:pipeline/predictive-maintenance-pipeline',
 'ResponseMetadata': {'RequestId': 'f581c3e3-0e3a-4861-b08c-1264bbf0490e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f581c3e3-0e3a-4861-b08c-1264bbf0490e',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '99',
   'date': 'Wed, 10 May 2023 04:18:10 GMT'},
  'RetryAttempts': 0}}

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:451633145432:pipeline/predictive-maintenance-pipeline',
 'ResponseMetadata': {'RequestId': 'f581c3e3-0e3a-4861-b08c-1264bbf0490e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f581c3e3-0e3a-4861-b08c-1264bbf0490e',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '99',
   'date': 'Wed, 10 May 2023 04:18:10 GMT'},
  'RetryAttempts': 0}}

In [183]:
execution = pipeline.start()

In [38]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:451633145432:pipeline/predictive-maintenance-pipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:451633145432:pipeline/predictive-maintenance-pipeline/execution/4wmxtlb5l2o8',
 'PipelineExecutionDisplayName': 'execution-1682575101943',
 'PipelineExecutionStatus': 'Executing',
 'PipelineExperimentConfig': {'ExperimentName': 'predictive-maintenance-pipeline',
  'TrialName': '4wmxtlb5l2o8'},
 'CreationTime': datetime.datetime(2023, 4, 27, 5, 58, 21, 876000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2023, 4, 27, 6, 57, 20, 878000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:451633145432:user-profile/d-l7econawrxww/aiml-sandbox',
  'UserProfileName': 'aiml-sandbox',
  'DomainId': 'd-l7econawrxww'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:451633145432:user-profile/d-l7econawrxww/aiml-sandbox',
  'UserProfileName': 'aiml-sandbox',
  'DomainId': 'd-l7econawr

In [184]:
execution.list_steps()

[{'StepName': 'PdM-Data-Read-And-PreProcessing',
  'StartTime': datetime.datetime(2023, 5, 10, 4, 18, 13, 445000, tzinfo=tzlocal()),
  'StepStatus': 'Executing',
  'AttemptCount': 0,
  'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:451633145432:processing-job/pipelines-54hc9qp1c194-PdM-Data-Read-And-Pr-bImW1R0XGT'}}}]

[{'StepName': 'PdM-Data-Read-And-PreProcessing',
  'StartTime': datetime.datetime(2023, 5, 10, 4, 18, 13, 445000, tzinfo=tzlocal()),
  'StepStatus': 'Executing',
  'AttemptCount': 0,
  'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:451633145432:processing-job/pipelines-54hc9qp1c194-PdM-Data-Read-And-Pr-bImW1R0XGT'}}}]

In [60]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer


viz = LineageTableVisualizer(sagemaker.session.Session())
for execution_step in reversed(execution.list_steps()):
    print(execution_step)
    display(viz.show(pipeline_execution_step=execution_step))
    time.sleep(5)

{'StepName': 'PredictiveMaintenanceProcess', 'StartTime': datetime.datetime(2023, 4, 23, 17, 17, 26, 95000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 4, 23, 17, 21, 36, 865000, tzinfo=tzlocal()), 'StepStatus': 'Failed', 'AttemptCount': 0, 'FailureReason': 'ClientError: AlgorithmError: See job logs for more information', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:451633145432:processing-job/pipelines-zz185sc95bbe-predictivemaintenanc-r6sgbfjtd8'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...7d1d7483ff10e4a22a58f19/preprocessing.py,Input,DataSet,ContributedTo,artifact
1,s3://...ve-maintenance/data/train-test/train.csv,Input,DataSet,ContributedTo,artifact
2,68331...com/sagemaker-scikit-learn:1.2-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...PredictiveMaintenanceProcess/output/test,Output,DataSet,Produced,artifact
4,s3://...redictiveMaintenanceProcess/output/train,Output,DataSet,Produced,artifact


### List and Check the Endpoint

In [185]:
endpoint_info = sagemaker_boto_client.describe_endpoint(EndpointName = "PdM-SKLearn-Pipeline-Endpoint-ReTraining")
endpoint_info["EndpointStatus"]

'InService'

'InService'

In [33]:
sagemaker_boto_client.list_endpoints(NameContains = "PdM-SKLearn-Pipeline-Endpoint-ReTraining")[
    "Endpoints"
]

[{'EndpointName': 'PdM-SKLearn-Pipeline-Endpoint-ReTraining',
  'EndpointArn': 'arn:aws:sagemaker:us-east-1:451633145432:endpoint/pdm-sklearn-pipeline-endpoint-retraining',
  'CreationTime': datetime.datetime(2023, 5, 9, 8, 4, 32, 950000, tzinfo=tzlocal()),
  'LastModifiedTime': datetime.datetime(2023, 5, 9, 8, 6, 42, 253000, tzinfo=tzlocal()),
  'EndpointStatus': 'InService'}]

### Test the Endpoint with json data

In [None]:
sagemaker_runtime = boto3.client('sagemaker-runtime')

inference_data = [[175.0811885,367.0124825,85.56419146,38.93223666,9.619609293,14.06342007,8.532316108,6.92274997,173.6695574,410.6964281,101.2469478,39.05261843,16.14284195,46.31450847,10.57037821,5.183643471,0,1,2,0,0,15,0,135,0,18,0,0,1,0]]

# specify the endpoint name and content type
endpoint_name = "PdM-SKLearn-Pipeline-Endpoint-ReTraining"
content_type = 'application/json'

# convert the input data to JSON format
json_data = json.dumps(inference_data)
print(type(json_data))

# make a request to the SageMaker endpoint
response = sagemaker_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=inference_data,
#     Accept="application/json"
)

print(f"Response object: {response}")
print(type(response))

# parse the response
result = json.loads(response['Body'].read().decode())

# print the inference result
print(result)


### Test the Endpoint with csv data

In [188]:
inference_data = "175.0811885,367.0124825,85.56419146,38.93223666,9.619609293,14.06342007,8.532316108,6.92274997,173.6695574,410.6964281,101.2469478,39.05261843,16.14284195,46.31450847,10.57037821,5.183643471,0,1,2,0,0,15,0,135,0,18,0,0,1,0"

# specify the endpoint name and content type
endpoint_name = "PdM-SKLearn-Pipeline-Endpoint-ReTraining"
content_type = 'text/csv'

# make a request to the SageMaker endpoint
response = sagemaker_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=inference_data,
)

print(f"Response object: {response}")
print(type(response))

# # parse the response
# result = json.loads(response['Body'].read().decode())

# # print the inference result
# print(result)


ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from primary with message "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/PdM-SKLearn-Pipeline-Endpoint-ReTraining in account 451633145432 for more information.

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from primary with message "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/PdM-SKLearn-Pipeline-Endpoint-ReTraining in account 451633145432 for more information.

### Model Monitoring - Data Drift

In [193]:
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat

pdm_data_drift_monitor = DefaultModelMonitor(
    role = sagemaker_role,
    instance_count = 1,
    instance_type = "ml.m5.xlarge",
    volume_size_in_gb = 1,
    max_runtime_in_seconds = 360,
    sagemaker_session = sagemaker_session,
)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: .
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [194]:
from datetime import datetime

baseline_data = "datasets/train-test/train.csv"
baseline_results_uri = f"s3://{bucket}/{prefix}/data/baselining/results2"
baseline_job_name = f"PdM-Baseline-Job-Data-Monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"

pdm_data_drift_monitor.suggest_baseline(
    job_name = baseline_job_name,
    baseline_dataset = baseline_data,
    dataset_format = DatasetFormat.csv(header = True),
    output_s3_uri = baseline_results_uri,
)

INFO:sagemaker:Creating processing-job with name PdM-Baseline-Job-Data-Monitor-2023-05-10-0518


..........................[34m2023-05-10 05:23:09,985 - matplotlib.font_manager - INFO - Generating new fontManager, this may take some time...[0m
[34m2023-05-10 05:23:10.532842: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory[0m
[34m2023-05-10 05:23:10.532868: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.[0m
[34m2023-05-10 05:23:12.135439: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory[0m
[34m2023-05-10 05:23:12.135468: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)[0m
[34m2023-05-10 05:23:12.135488: I tensorflow/stream_executor/cuda/cuda_diagnostic

<sagemaker.processing.ProcessingJob at 0x7f2272dd63d0>

### Preprocessing Script for monitoring schedule data

In [233]:
%%writefile scripts/datacapture_preprocessing.py

import json

def preprocess_handler(inference_record):
    input_data = json.loads(inference_record.endpoint_input.data)
    input_data = {f"feature{str(i).zfill(10)}": val for i, val in enumerate(input_data)}

    output_data = json.loads(inference_record.endpoint_output.data)
#     output_data = json.loads(inference_record.endpoint_output.data)["predictions"][0][0]
    output_data = {"prediction0": output_data}

    print(input_data)
    print(type(input_data))
    print(output_data)
    return {**input_data}

Overwriting scripts/datacapture_preprocessing.py


In [234]:
s3_client.upload_file(
    Filename="scripts/datacapture_preprocessing.py", Bucket=bucket, Key=f"{prefix}/code/datacapture_preprocessing.py"
)

### Create Monitoring Schedule

In [217]:
preprocessor_path = f"s3://{bucket}/{prefix}/code/datacapture_preprocessing.py"

In [218]:
from sagemaker.model_monitor import CronExpressionGenerator

monitor_schedule_name = "PdM-DataDrift-Monitoring-Schedule-2"

pdm_data_drift_monitor.create_monitoring_schedule(
    monitor_schedule_name = monitor_schedule_name,
    statistics = pdm_data_drift_monitor.baseline_statistics(),
    record_preprocessor_script = preprocessor_path,
    endpoint_input = endpoint_name,
    constraints = pdm_data_drift_monitor.suggested_constraints(),
    schedule_cron_expression = CronExpressionGenerator.hourly(),
    output_s3_uri = baseline_results_uri,
    enable_cloudwatch_metrics = True
)

INFO:sagemaker.model_monitor.model_monitoring:Creating Monitoring Schedule with name: PdM-DataDrift-Monitoring-Schedule-2


In [127]:
pdm_data_drift_monitor.monitoring_schedule_name = monitor_schedule_name
print(pdm_data_drift_monitor.monitoring_schedule_name)

PdM-DataDrift-Monitoring-Schedule-2


In [223]:
len(pdm_data_drift_monitor.list_executions())

5

In [225]:
pdm_data_drift_monitor.list_executions()[-1].describe()

{'ProcessingInputs': [{'InputName': 'baseline',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://ideaaiml-demo/mlops/predictive-maintenance/data/baselining/results2/statistics.json',
    'LocalPath': '/opt/ml/processing/baseline/stats',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated'}},
  {'InputName': 'constraints',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://ideaaiml-demo/mlops/predictive-maintenance/data/baselining/results2/constraints.json',
    'LocalPath': '/opt/ml/processing/baseline/constraints',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated'}},
  {'InputName': 'pre_processor_script',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://ideaaiml-demo/mlops/predictive-maintenance/code/datacapture_preprocessing.py',
    'LocalPath': '/opt/ml/processing/code/preprocessing',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributio

In [216]:
pdm_data_drift_monitor.delete_monitoring_schedule()


Deleting Monitoring Schedule with name: PdM-DataDrift-Monitoring-Schedule-2


INFO:sagemaker.model_monitor.model_monitoring:Deleting Data Quality Job Definition with name: data-quality-job-definition-2023-05-10-05-32-45-915


### Preprocessing for Data Capturing

In [196]:
import json

inference_data = [[175.0811885,367.0124825,85.56419146,38.93223666,9.619609293,14.06342007,8.532316108,6.92274997,173.6695574,410.6964281,101.2469478,39.05261843,16.14284195,46.31450847,10.57037821,5.183643471,0,1,2,0,0,15,0,135,0,18,0,0,1,0]]
json_data = json.dumps(inference_data)
endpoint_input = endpoint_name

# def preprocess_handler(inference_record):
#     input_data = json.loads(inference_record.endpoint_input.data)
input_data = json.loads(json_data)
input_data = {f"feature{str(i).zfill(10)}": val for i, val in enumerate(input_data)}

#     output_data = json.loads(inference_record.endpoint_output.data)["predictions"][0][0]
#     output_data = {"prediction0": output_data}

print(input_data)
print(type(input_data))
#     print(output_data)

{'feature0000000000': [175.0811885, 367.0124825, 85.56419146, 38.93223666, 9.619609293, 14.06342007, 8.532316108, 6.92274997, 173.6695574, 410.6964281, 101.2469478, 39.05261843, 16.14284195, 46.31450847, 10.57037821, 5.183643471, 0, 1, 2, 0, 0, 15, 0, 135, 0, 18, 0, 0, 1, 0]}
<class 'dict'>
