# ModelOps
The following steps incorporates saving the model within the S3 Bucket to have a model registry as an option. 

In [2]:
%%writefile truck_breakoff_rl_markov.py

import numpy as np
import tensorflow as tf
import pandas as pd
import os
import random
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
import joblib
import pathlib
from io import StringIO
import argparse
import joblib

# saves model within s3 bucket
def model_fn(model_dir):
    clf = joblib.load(os.path.join(model_dir, "model.joblib"))
    return clf


def reinforcement_model(self):
    # Define markov chain
    # Define the transition matrix (Markov chain)
    transition_matrix = np.array([[0.9, 0.1],
                                [0.3, 0.7]])

    # Define the reward matrix
    reward_matrix = np.array([[10, -1],
                            [-1, 10]])

    # Define hyperparameters
    num_episodes = 1000
    learning_rate = 0.1
    discount_factor = 0.95
    epsilon = 0.1

    # Define the Q-network
    num_states = transition_matrix.shape[0]
    num_actions = transition_matrix.shape[1]
    num_features = 4  # Number of features in your input data
    W = tf.Variable(tf.random.uniform([num_states, num_actions], 0, 0.01))

    # Define loss and optimizer
    optimizer = tf.optimizers.SGD(learning_rate=learning_rate)

    # Initialize TensorFlow session
    for episode in range(num_episodes):
        state = np.random.randint(0, num_states)  # Start at a random state
        while True:
            # Choose action (epsilon-greedy)
            if np.random.rand() < epsilon:
                action = np.random.randint(0, num_actions)
            else:
                one_hot_state = tf.reshape(tf.one_hot(state, num_states), [1, -1])
                action = tf.argmax(tf.matmul(one_hot_state, W), 1).numpy()[0]
            # Perform action and observe next state and reward
            next_state = np.random.choice(range(num_states), p=transition_matrix[state])
            hot_next_state = tf.reshape(tf.one_hot(next_state, num_states), [1, -1])
            reward = reward_matrix[state, action]
            # Compute Q-value of next state
            Q_next = tf.matmul(hot_next_state, W)
            # Update Q-value of current state
            max_Q_next = tf.reduce_max(Q_next)
            target_Q_values = tf.matmul(hot_next_state, W)
            
            # Update Q-value of current state
            target_Q_values_updated = tf.identity(target_Q_values)  # Create a copy
            target_Q_values_updated = tf.tensor_scatter_nd_update(target_Q_values_updated, [[0, action]], [reward + discount_factor * max_Q_next])

            # Train Q-network
            with tf.GradientTape() as tape:
                Q_values = tf.matmul(one_hot_state, W)
                loss = tf.reduce_sum(tf.square(target_Q_values_updated - Q_values))

            gradients = tape.gradient(loss, [W])
            optimizer.apply_gradients(zip(gradients, [W]))
            state = next_state
            if state == 0:  # Reached terminal state
                break

    # Save the learned model in model directory

    tf.saved_model.save(W, '../model/truck_break_off_model')
    # Print the learned Q-values
    print("Learned Q-values:")
    print(W.numpy())

 def evaluate_model(self, model):
    # Evaluate the model on the test set
    num_states = self.transition_matrix.shape[0]
    num_actions = self.transition_matrix.shape[1]
    num_features = 7  # Number of features in your input data
    correct_predictions = 0
    for index, row in self.test.iterrows():
        state = int(row['TRUCK_BREAK_OFF'])  # Convert state to integer
        one_hot_state = tf.reshape(tf.one_hot(state, num_states), [1, -1])
        action = tf.argmax(tf.matmul(one_hot_state, model), 1).numpy()[0]
        # Assuming action 0 corresponds to no truck break off, action 1 corresponds to truck break off
        predicted_break_off = action
        true_break_off = row['LABEL']
        if predicted_break_off == true_break_off:
            correct_predictions += 1

    manual_calc_accuracy = correct_predictions / len(self.test)
    print("Manual calculation accuracy:", manual_calc_accuracy)

    # Evaluate the model using sklearn metrics
    y_true = self.test['LABEL']
    y_pred = []
    for index, row in self.test.iterrows():
        state = int(row['ROUTEID'])  # Convert state to integer
        one_hot_state = tf.reshape(tf.one_hot(state, num_states), [1, -1])
        action = tf.argmax(tf.matmul(one_hot_state, model), 1).numpy()[0]
        # Assuming action 0 corresponds to no truck break off, action 1 corresponds to truck break off
        predicted_break_off = action
        y_pred.append(predicted_break_off)
    accuracy = accuracy_score(y_true, y_pred)
    report = classification_report(y_true, y_pred, zero_division=1)  # Set zero_division parameter
    confusion = confusion_matrix(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=1)  # Set zero_division parameter
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    print("Accuracy:", accuracy)
    print("Classification Report:\n", report)
    print("Confusion Matrix:\n", confusion)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)


if __name__ == "__main__":
    print("[INFO] Extracting arguements...")
    parser = argparse.ArgumentParser()

    # Hyperparamets sent by the cleinet are passed as command-line arguments to the script.
    parser.add_argument("--num_episodes", type=int, default=1000)
    parser.add_argument("--learning_rate", type=float, default=0.1)
    parser.add_argument("--discount_factor", type=float, default=0.95)
    parser.add_argument("--epsilon", type=float, default=0.1)
    parser.add_argument("--num_states", type=int, default=transition_matrix.shape[0])
    parser.add_argument("--num_actions", type=int, default=transition_matrix.shape[1])
    parser.add_argument("--num_features", type=int, default=7)


    # Data, model, and output directories
    # sets the sagemaker environment within sagemaker
    parser.add_argument("--model_dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
    parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAINING"))
    parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TESTING"))

    # test/train files
    parser.add_argument("--train_file", type=str, default="train-V1.csv")
    parser.add_argument("--test_file", type=str, default="test-V1.csv")

    args, _ = parser.parse_known_args()

    print("[INFO] Reading data...")
    print()
    train_df = pd.read_csv(os.path.join(args.train, args.train_file))
    test_df = pd.read_csv(os.path.join(args.test, args.test_file))


    print("[INFO] Building Training & Testing Datasets...")
    print()
    X_train = train_df[features]
    y_train = train_df[label]
    X_test = test_df[features]
    y_test = test_df[label]

    print("[INFO] Training Model...")
    print()
    model_dir = args.model_dir
    reinforcement_model(model_dir)
    model_fn(model_dir)
    reinforcement_model(model_dir)

    model_path = os.path.join(model_dir, "model.joblib")
    joblib.dump(reinforcement_model, model_path)
    print("Model saved at: {}".format(model_path))
    print()

    print("[INFO] Model Training Complete...")


Overwriting truck_breakoff_rl_markov.py


### Role input & Sagemaker SKlearn
Must get sagemaker role from IAM. In this particular instance we took an existing role for sagemaker (execution role) to enable this functionality. 

In [8]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"

# Create an SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point="truck_breakoff_rl_markov.py",
    role="arn:aws:iam::174023208515:role/service-role/AmazonSageMaker-ExecutionRole-20240321T161040",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    framework_version=FRAMEWORK_VERSION,
    base_job_name="truck-breakoff-rl-markov",
    hyperparameters={
        "num_episodes": 1000,
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon": 0.1,
        "num_states": 2,
        "num_actions": 2,
        "num_features": 4,
    },
    use_spot_instances=True,
    max_wait = 7200,
    max_run = 3600,
    metric_definitions=[
        {"Name": "accuracy", "Regex": "Accuracy: ([0-9.]+).*$"},
        {"Name": "precision", "Regex": "Precision: ([0-9.]+).*$"},
        {"Name": "recall", "Regex": "Recall: ([0-9.]+).*$"},
        {"Name": "f1", "Regex": "F1: ([0-9.]+).*$"},
    ],
)


## Training the Model

In [10]:
# Launch training job with an async call
train_path = "s3://martymdlregistry/sagemaker/truck-break-off-rl_markov/train-V1.csv"
test_path = "s3://martymdlregistry/sagemaker/truck-break-off-rl_markov/test-V1.csv"
sklearn_estimator.fit({"train": train_path, "test": test_path}, wait=False)


INFO:sagemaker:Creating training-job with name: truck-breakoff-rl-markov-2024-04-04-05-31-41-957
