# ModelOps and Deep Learning Algo
The following steps incorporates saving the model within the S3 Bucket to have a model registry as an option. 

In [125]:
%%writefile truck_breakoff_rl_markov.py

import numpy as np
import tensorflow as tf
import pandas as pd
import os
import random
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
import joblib
import pathlib
from io import StringIO
import argparse
import joblib

# Disable eager execution
tf.compat.v1.disable_eager_execution()

class TruckBreakOffModel:
    # saves model within s3 bucket
    def model_fn(self, model_dir):
        clf = joblib.load(os.path.join(model_dir, "model.joblib"))
        return clf


    def reinforcement_model(self, args):
        # Define markov chain
        transition_matrix = np.array([[0.9, 0.1],
                                      [0.3, 0.7]])

        # Define the reward matrix
        reward_matrix = np.array([[10, -1],
                                  [-1, 10]])

        # Define hyperparameters
        num_episodes = args.num_episodes
        learning_rate = args.learning_rate
        discount_factor = args.discount_factor
        epsilon = args.epsilon

        # Define the Q-network
        num_states = transition_matrix.shape[0]
        num_actions = transition_matrix.shape[1]
        W = tf.Variable(tf.random.uniform([num_states, num_actions], 0, 0.01))
        W = tf.transpose(W)

        # Define placeholders for state, action, and target Q-value
        state_ph = tf.compat.v1.placeholder(tf.int32, shape=[])
        action_ph = tf.compat.v1.placeholder(tf.int32, shape=[])
        target_q_value_ph = tf.compat.v1.placeholder(tf.float32, shape=[])

        # Compute Q-value of current state
        one_hot_state = tf.one_hot(state_ph, num_states)
        one_hot_state = tf.reshape(one_hot_state, [1, -1])  # Reshape to match the shape of W
        Q_values = tf.matmul(one_hot_state, tf.transpose(W))

        # Define loss
        updated_Q_values = tf.tensor_scatter_nd_update(Q_values, [[0, action_ph]], [target_q_value_ph])
        loss = tf.reduce_sum(tf.square(updated_Q_values - Q_values))

        # Define optimizer
        optimizer = tf.optimizers.SGD(learning_rate=learning_rate)

        # Define training operation
        train_op = optimizer.minimize(loss)

        # Start TensorFlow session
        with tf.compat.v1.Session() as sess:
            # Initialize variables
            sess.run(tf.compat.v1.global_variables_initializer())

            # Training loop
            for episode in range(num_episodes):
                state = np.random.randint(0, num_states)  # Start at a random state
                while True:
                    # Choose action (epsilon-greedy)
                    if np.random.rand() < epsilon:
                        action = np.random.randint(0, num_actions)
                    else:
                        action = sess.run(tf.argmax(Q_values, 1), feed_dict={state_ph: state})

                    # Perform action and observe next state
                    next_state = np.random.choice(range(num_states), p=transition_matrix[state])

                    # Compute reward
                    reward = reward_matrix[state, action]

                    # Compute target Q-value
                    max_Q_next = np.max(sess.run(Q_values, feed_dict={state_ph: next_state}))
                    target_Q_value = reward + discount_factor * max_Q_next

                    # Update Q-value
                    _ = sess.run(train_op, feed_dict={state_ph: state, action_ph: action, target_q_value_ph: target_Q_value})

                    state = next_state
                    if state == 0:  # Reached terminal state
                        break

            # Get learned Q-values
            learned_Q_values = sess.run(Q_values)
       

        # Print the learned Q-values
        print("Learned Q-values:")
        print(learned_Q_values)
        # After training is complete, assign the learned values to instance attributes
        self.learned_Q_values = learned_Q_values
        self.transition_matrix = transition_matrix
        self.reward_matrix = reward_matrix

        return learned_Q_values, transition_matrix, reward_matrix

    def evaluate_model(self, learned_Q_values, transition_matrix, reward_matrix):
        num_states = transition_matrix.shape[0]
        num_actions = transition_matrix.shape[1]
        
        # Initialize the cumulative return
        total_return = 0

        # Run episodes to compute the return
        num_episodes = 1000  # You can adjust this number
        for _ in range(num_episodes):
            state = np.random.randint(0, num_states)
            episode_return = 0
            while True:
                action = np.argmax(learned_Q_values[state])
                next_state = np.random.choice(range(num_states), p=transition_matrix[state])
                reward = reward_matrix[state, action]
                episode_return += reward
                state = next_state
                if state == 0:  # Reached terminal state
                    break
            total_return += episode_return

        average_return = total_return / num_episodes
        return average_return


if __name__ == "__main__":
    print("[INFO] Extracting arguments...")
    print()
    truck_break_off_mdl = TruckBreakOffModel()

    parser = argparse.ArgumentParser()


    # Hyperparameters sent by the client are passed as command-line arguments to the script.
    parser.add_argument("--num_episodes", type=int, default=1000)
    parser.add_argument("--learning_rate", type=float, default=0.1)
    parser.add_argument("--discount_factor", type=float, default=0.95)
    parser.add_argument("--epsilon", type=float, default=0.1)
    parser.add_argument("--num_states", type=int, default=2)
    parser.add_argument("--num_actions", type=int, default=2)
    parser.add_argument("--num_features", type=int, default=7)
 
    # Data, model, and output directories
    # sets the SageMaker environment within SageMaker
    parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAINING"))
    parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TESTING"))

    # test/train files
    parser.add_argument("--train_file", type=str, default="train-V1.csv")
    parser.add_argument("--test_file", type=str, default="test-V1.csv")


    args, _ = parser.parse_known_args()

    print("[INFO] Reading data...")
    print()
    train_df = pd.read_csv(os.path.join(args.train, args.train_file))
    test_df = pd.read_csv(os.path.join(args.test, args.test_file))

    print("Train Dataset:\n", train_df.head())
    print()
    print("Test Dataset:\n", test_df.head())
    print()


    print("[INFO] Building Training & Testing Datasets...")
    print()
    features = ['ROUTEID', 'LAST_EDITED_DATE','FROMDATE', 'TODATE', 'FROMMEASURE', 'TOMEASURE', 'TRUCK_BREAK_OFF']
    label = 'LABEL'
    X_train = train_df[features]
    y_train = train_df[label]
    X_test = test_df[features]
    y_test = test_df[label]

    print("[INFO] Training Model...")
    print()

    # send to S3 bucket. SageMaker will take training data from the S3 bucket
    sk_prefix = "sagemaker/truck-break-off/datasets" # sagemaker environment
    model_dir = args.model_dir

    model_path = os.path.join(args.model_dir, "model.joblib")
    joblib.dump(truck_break_off_mdl.reinforcement_model, model_path)
    print("Model saved at: {}".format(model_path))
    print()

    print("[INFO] Model Training Complete...")

 


Overwriting truck_breakoff_rl_markov.py


### Sagemaker Training of Model
Must get sagemaker role from IAM. In this particular instance we took an existing role for sagemaker (execution role) to enable this functionality. This role is usually provided by AWS or you can create one specific for you. Different roles and images can have CPU and GPU and there is a cost associated to these depending on time. 

In [129]:
from sagemaker.tensorflow import TensorFlow

FRAMEWORK_VERSION = "2.7.0"

# Specify the image URI for TensorFlow
# image_uri = f"763104351884.dkr.ecr.us-west-1.amazonaws.com/tensorflow-training:{FRAMEWORK_VERSION}-cpu-py37-ubuntu18.04"
image_uri = f"763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.12.0-cpu-py310-ubuntu20.04-sagemaker"

# Create a TensorFlow estimator
tensorflow_estimator = TensorFlow(
    entry_point="truck_breakoff_rl_markov.py",
    # role="arn:aws:iam::174023208515:role/service-role/AmazonSageMaker-ExecutionRole-20240321T161040", # get from aws roles
    role= "arn:aws:iam::174023208515:role/sagemaker-truck-break-off-role",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    image_uri=image_uri,
    base_job_name="truck-breakoff-rl-markov",
    hyperparameters={
        "num_episodes": 1000,
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon": 0.1,
        "num_states": 2,
        "num_actions": 2,
        "num_features": 7,
    },
    use_spot_instances=True,
    max_wait=7200,
    max_run=3600,
    output_path= "s3://martymdlregistry/sagemaker/truck-break-off/models/deep_learning",
)


## Asynchronous call to launch training of model

In [130]:
# Launch training job with an async call
train_path = "s3://martymdlregistry/sagemaker/truck-break-off/datasets/train-V1.csv"
test_path = "s3://martymdlregistry/sagemaker/truck-break-off/datasets/test-V1.csv"

tensorflow_estimator.fit({"training": train_path, "testing": test_path})



INFO:sagemaker:Creating training-job with name: truck-breakoff-rl-markov-2024-04-08-19-30-32-177


2024-04-08 19:30:33 Starting - Starting the training job...
2024-04-08 19:30:50 Starting - Preparing the instances for training...
2024-04-08 19:31:29 Downloading - Downloading input data...
2024-04-08 19:31:49 Downloading - Downloading the training image...
2024-04-08 19:32:35 Training - Training image download completed. Training in progress..2024-04-08 19:32:43.919230: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX512F, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-08 19:32:46,099 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2024-04-08 19:32:46,100 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2024-04-08 19:32:46,100 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)
202

## Show where the model is stored in the model registry

In [131]:
import sagemaker
import boto3

# creating a client for sagemaker
sm_boto3 = boto3.client('sagemaker')
# creating a session for sagemaker
sess = sagemaker.Session()
region = sess.boto_session.region_name

# Need to create an S3 bucket
bucket = 'martymdlregistry' # specific s3 bucket
print('Using bucket ' + bucket)

tensorflow_estimator.latest_training_job.wait(logs="None")

artifact = sm_boto3.describe_training_job(
    TrainingJobName=tensorflow_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]
print("Model artifact persisted at " + artifact)


Using bucket martymdlregistry

2024-04-08 19:33:06 Starting - Preparing the instances for training
2024-04-08 19:33:06 Downloading - Downloading the training image
2024-04-08 19:33:06 Training - Training image download completed. Training in progress.
2024-04-08 19:33:06 Uploading - Uploading generated training model
2024-04-08 19:33:06 Completed - Training job completed
Model artifact persisted at s3://martymdlregistry/sagemaker/truck-break-off/models/deep_learning/truck-breakoff-rl-markov-2024-04-08-19-30-32-177/output/model.tar.gz


## Define Capbility for Deployment

We want to keep a copy so that we can deploy a specific model at an endpoint (App)

In [132]:
from sagemaker.tensorflow.model import TensorFlowModel
from time import gmtime, strftime # type: ignore

model_name = "truckBreakOffModelDeepLearning" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
image_uri = "763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.12.0-cpu-py310-ubuntu20.04-sagemaker"

tensorflow_model = TensorFlowModel(model_data=artifact,
                                    role="arn:aws:iam::174023208515:role/sagemaker-truck-break-off-role",
                                    framework_version="2.7.0",
                                    image_uri=image_uri)

endpoint_name = "truckBreakOffModelDeepLearningEndpoint-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

predictor = tensorflow_model.deploy(initial_instance_count=1,
                                    instance_type="ml.m4.xlarge",
                                    endpoint_name=endpoint_name)


INFO:sagemaker:Creating model with name: tensorflow-training-2024-04-08-19-34-20-942
INFO:sagemaker:Creating endpoint-config with name truckBreakOffModelDeepLearningEndpoint-2024-04-08-19-34-18
INFO:sagemaker:Creating endpoint with name truckBreakOffModelDeepLearningEndpoint-2024-04-08-19-34-18


--------------*

UnexpectedStatusException: Error hosting endpoint truckBreakOffModelDeepLearningEndpoint-2024-04-08-19-34-18: Failed. Reason: CannotStartContainerError. Please ensure the model container for variant AllTraffic starts correctly when invoked with 'docker run <image> serve'. Try changing the instance type or reference the troubleshooting page https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-troubleshooting.html

In [114]:
tensorflow_model

<sagemaker.tensorflow.estimator.TensorFlow at 0x30f8406a0>

In [None]:
predictor

### Shows sagemaker predictor based on the Deep Learning Algo

In [117]:
endpoint_name

'truckBreakOffModelDeepLearning-2024-04-08-19-01-23'

## Test and Predict

# Delete Endpoint
Having endpoints will incur cost overtime if this is just an experiment. Endpoints will help manage the live applications. You will need to determine what endpoints are in production or in experiment/staging

In [None]:
sm_boto3.delete_endpoint(EndpointName=endpoint_name)