
## 2c. Evidence - Reliability

Evidence collected in this section checks for functional correctness in the Reliability Example

In [None]:
{
    "tags": ["Reinforcement Learning"],
    "quality_attribute": "Model outputs improve progress towards goal 99.9% of the time.",
    "description": "Model receives valid values from sensors during Normal Operation and produces outputs (actions) which improve the expected reward 99.9% of the time.  ",
    "inputs": "Initial random start position",
    "output": "Log with 1 for action determined to make progress, and 0 for those that do not.",
}

In [None]:
MEASURE_NAME = "reliability"
NUM_TRIALS = 100

### Initialize MLTE Context

MLTE contains a global context that manages the currently active _session_. Initializing the context tells MLTE how to store all of the artifacts that it produces. This import will also set up global constants related to folders and model to use.

In [None]:
# Sets up context for the model being used, sets up constants related to folders and model data to be used.
from session import *

In [None]:
import numpy as np
import gymnasium as gym

In [None]:
env = gym.make("MountainCar-v0", render_mode="rgb_array")
state, info = env.reset()

In [None]:
# Discretize the state space (position, velocity)
position_bins = np.linspace(-1.2, 0.6, 20)
velocity_bins = np.linspace(-0.07, 0.07, 20)

# Q-table initialization
q_table = np.load(os.path.join(DATA_DIR, "mountain_car.npy"))


# Discretize the continuous state (position and velocity)
def discretize_state(state):
    position, velocity = state
    position_idx = (
        np.digitize(position, position_bins) - 1
    )  # Position bin index
    velocity_idx = (
        np.digitize(velocity, velocity_bins) - 1
    )  # Velocity bin index
    return position_idx, velocity_idx


# Epsilon-greedy action selection
def choose_action(state):
    position_idx, velocity_idx = discretize_state(state)
    return np.argmax(q_table[position_idx, velocity_idx])

## Reliability

Agent receives valid values from sensors during Normal Operation. Agent produces actions which improve the expected reward 99.9% of the time.

In [None]:
def evaluate_action(state, action):
    "Return 1 if this is the expected action, return 0 if it is the wrong move, and -1 as an error condition"
    position, velocity = state
    if (position < 0.1) & (velocity < 0):
        return np.bool(action == 0)
    if (position < 0.1) & (velocity > 0):
        return np.bool(action == 2)
    if (position > 0.1) & (velocity > 0):
        return np.bool(action == 0)
    if (position < 0.1) & (velocity > 0):
        return np.bool(action == 2)
    return -1

In [None]:
def test_reliability():
    done = False
    total_reward = 0
    actions = []
    test_results = []

    for i in range(NUM_TRIALS):
        state, info = env.reset()
        done = False

        while not done:
            # Random action selection
            action = choose_action(state)
            actions.append(action)

            # Take the action and get the next state, reward, done flag, and info
            next_state, reward, done, truncated, info = env.step(action)

            # Evaluate the results
            result = evaluate_action(state, action)
            if result == True:
                test_results.append(1)
            elif result == False:
                test_results.append(0)

            # Update the state for the next iteration
            state = next_state
        print(f"Completed trial {i}")

    return test_results

In [None]:
from mlte.evidence.types.array import Array
from mlte.measurement.external_measurement import ExternalMeasurement

# Evaluate accuracy, identifier has to be the same one defined in the TestSuite.
position_compliance_measurement = ExternalMeasurement(
    MEASURE_NAME, Array, test_reliability
)
evidence = position_compliance_measurement.evaluate()

# Inspect value
print(evidence)

# Save to artifact store
evidence.save(force=True, parents=True)