# ModelOps
The following steps incorporates saving the model within the S3 Bucket to have a model registry as an option. 

In [54]:
%%writefile truck_break_off_model.py

import numpy as np
import pandas as pd
import os
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
import joblib
import argparse

class TruckBreakOffModel:

    def model_fn(self, model_dir):
        clf = joblib.load(os.path.join(model_dir, "model.joblib"))
        return clf


    def ml_model(self):  
        n_estimators = 100
        random_state = 0  

        # Create a random forest classifier
        clf = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)

        # Generate random data
        X = np.random.rand(100, 4)  # Example data, replace with actual data
        y = np.random.randint(2, size=100)  # Example labels, replace with actual labels

        # Train the classifier
        clf.fit(X, y)

        # Evaluate the model:
        y_pred = clf.predict(X)
        accuracy = accuracy_score(y, y_pred)
        print("Accuracy:", accuracy)
        # classification report
        report = classification_report(y, y_pred)
        print("Classification Report:\n", report)
        # confusion matrix
        confusion = confusion_matrix(y, y_pred)
        print("Confusion Matrix:\n", confusion)
        # precision
        precision = precision_score(y, y_pred)
        print("Precision:", precision)
        # recall
        recall = recall_score(y, y_pred)
        print("Recall:", recall)
        # f1 score
        f1 = f1_score(y, y_pred) 
        print("F1 Score:", f1)

        
        # Save the trained model
        joblib.dump(clf, "model.joblib")
        return clf  

    

        

if __name__ == "__main__":
    print("[INFO] Extracting arguments...")
    truck_break_off_mdl = TruckBreakOffModel()

    parser = argparse.ArgumentParser()

    parser.add_argument("--n_estimators", type=int, default=100)
    parser.add_argument("--random_state", type=int, default=0)

    # Data, model, and output directories
    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAINING"))
    parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TESTING"))
    parser.add_argument("--train-file", type=str, default="train-V1.csv")
    parser.add_argument("--test-file", type=str, default="test-V1.csv")

    args, _ = parser.parse_known_args()

    print("[INFO] Reading data...")
    print()
    train_df = pd.read_csv(os.path.join(args.train, args.train_file))
    test_df = pd.read_csv(os.path.join(args.test, args.test_file))

    print("Train Dataset:\n", train_df.head())
    print()
    print("Test Dataset:\n", test_df.head())
    print()

    print("[INFO] Building Training & Testing Datasets...")
    print()
    features = ['ROUTEID', 'LAST_EDITED_DATE','FROMDATE', 'TODATE', 'FROMMEASURE', 'TOMEASURE', 'TRUCK_BREAK_OFF']
    label = 'LABEL'

    print("[INFO] Training Model...")
    print()

    model = truck_break_off_mdl.ml_model()  
    model_path = os.path.join(args.model_dir, "model.joblib")
    joblib.dump(model, model_path)  
    print("Model saved at: {}".format(model_path))
    print()


Overwriting truck_break_off_model.py


### Sagemaker Training of Model
Must get sagemaker role from IAM. In this particular instance we took an existing role for sagemaker (execution role) to enable this functionality. This role is usually provided by AWS or you can create one specific for you. Different roles and images can have CPU and GPU and there is a cost associated to these depending on time. 

In [55]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"

# Create a SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point="truck_break_off_model.py",
    # role="arn:aws:iam::174023208515:role/service-role/AmazonSageMaker-ExecutionRole-20240321T161177", # Replace with AWS role
    role= "arn:aws:iam::174023208515:role/sagemaker-truck-break-off-role",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    base_job_name="truck-breakoff",
    framework_version=FRAMEWORK_VERSION,
    hyperparameters={
        "n_estimators": 100,
        "random_state": 0,
    },
    use_spot_instances=True,
    max_run=3600,
    max_wait=7200,
    output_path= "s3://martymdlregistry/sagemaker/truck-break-off/models/classic"
    
)


## Asynchronous call to launch training of model

In [56]:
# Launch training job with an async call
train_path = "s3://martymdlregistry/sagemaker/truck-break-off/datasets/train-V1.csv"
test_path = "s3://martymdlregistry/sagemaker/truck-break-off/datasets/test-V1.csv"

sklearn_estimator.fit({"training": train_path, "testing": test_path}, wait=True)



INFO:sagemaker:Creating training-job with name: truck-breakoff-2024-04-08-19-40-13-863


2024-04-08 19:40:15 Starting - Starting the training job...
2024-04-08 19:40:31 Starting - Preparing the instances for training...
2024-04-08 19:41:05 Downloading - Downloading input data...
2024-04-08 19:41:46 Training - Training image download completed. Training in progress..2024-04-08 19:41:53,896 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2024-04-08 19:41:53,900 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-04-08 19:41:53,946 sagemaker_sklearn_container.training INFO     Invoking user training script.
2024-04-08 19:41:54,119 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-04-08 19:41:54,132 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-04-08 19:41:54,144 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-04-08 19:41:54,153 sagemaker-training-toolkit INFO     Invoking user script
T

## Show where the model is stored in the model registry

In [64]:
import sagemaker
import boto3

# creating a client for sagemaker
sm_boto3 = boto3.client('sagemaker')
# creating a session for sagemaker
sess = sagemaker.Session()
region = sess.boto_session.region_name

# Need to create an S3 bucket
bucket = 'martymdlregistry' # specific s3 bucket
print('Using bucket ' + bucket)

sklearn_estimator.latest_training_job.wait(logs="None")

artifact = sm_boto3.describe_training_job(
    TrainingJobName=sklearn_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]
print("Model artifact persisted at " + artifact)


Using bucket martymdlregistry

2024-04-08 19:42:11 Starting - Preparing the instances for training
2024-04-08 19:42:11 Downloading - Downloading the training image
2024-04-08 19:42:11 Training - Training image download completed. Training in progress.
2024-04-08 19:42:11 Uploading - Uploading generated training model
2024-04-08 19:42:11 Completed - Training job completed
Model artifact persisted at s3://martymdlregistry/sagemaker/truck-break-off/models/classic/truck-breakoff-2024-04-08-19-40-13-863/output/model.tar.gz


## Define Capbility for Deployment

We want to keep a copy so that we can deploy a specific model at an endpoint (App)

In [65]:
from sagemaker.sklearn.model import SKLearnModel
from time import gmtime, strftime # type: ignore
from sagemaker.predictor import RealTimePredictor

model_name = "truckBreakOffModel-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model = SKLearnModel(
    name=model_name,
    model_data=artifact,
    role="arn:aws:iam::174023208515:role/sagemaker-truck-break-off-role",
    entry_point="truck_break_off_model.py",
    framework_version=FRAMEWORK_VERSION,
)

endpoint_name = "truckBreakOffModel-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName = {}".format(endpoint_name))

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name,
)



EndpointName = truckBreakOffModel-2024-04-08-20-59-45


INFO:sagemaker:Creating model with name: truckBreakOffModel-2024-04-08-20-59-45
INFO:sagemaker:Creating endpoint-config with name truckBreakOffModel-2024-04-08-20-59-45
INFO:sagemaker:Creating endpoint with name truckBreakOffModel-2024-04-08-20-59-45


--------------*

UnexpectedStatusException: Error hosting endpoint truckBreakOffModel-2024-04-08-20-59-45: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.. Try changing the instance type or reference the troubleshooting page https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-troubleshooting.html

In [61]:
model

<sagemaker.sklearn.model.SKLearnModel at 0x304e6c280>

In [62]:
predictor

NameError: name 'predictor' is not defined

### Shows sagemaker predictor

In [None]:
predictor

In [None]:
endpoint_name

## Test and Predict