In [4]:
# Preliminaries
import sys
import json
import os
from pathlib import Path
import pickle
from typing import Tuple

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def load_data() -> Tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]:
    """
    Load machine learning dataset.
    :return (X_train, X_test, y_train, y_test)
    """
    iris = load_iris(as_frame=True)
    X, y = iris.data, iris.target
    return train_test_split(X, y, test_size=0.2)

def load_model():
    """Load a trained model."""
    path = MODELS_DIR / "model_demo.pkl"
    with path.open("rb") as f:
        return pickle.load(f)      

def package_root() -> str:
    """Resolve the path to the project root."""
    return os.path.abspath(os.path.join(os.getcwd(), "..", "src/"))

sys.path.append(package_root())
sys.path.append(os.getcwd())

import mlte

store_path = os.path.join(os.getcwd(), "store")
os.makedirs(store_path, exist_ok=True)

mlte.set_model("IrisClassifier", "0.0.1")
mlte.set_artifact_store_uri(f"local://{store_path}")


DATASETS_DIR = Path(os.getcwd()) / "data"
os.makedirs(DATASETS_DIR, exist_ok=True)

MODELS_DIR = Path(os.getcwd()) / "models"
os.makedirs(MODELS_DIR, exist_ok=True)

MEDIA_DIR = Path(os.getcwd()) / "media"
os.makedirs(MEDIA_DIR, exist_ok=True)



In [6]:
from mlte.spec import DetailedSpec

from mlte.property.costs import (
    StorageCost,
    TrainingComputeCost,
    TrainingMemoryCost
)
from mlte.property.functionality import TaskEfficacy

from mlte.measurement.cpu import LocalProcessCPUUtilization
from mlte.measurement.storage import LocalObjectSize
from mlte.measurement import ExternalMeasurement
from confusion_matrix import ConfusionMatrix
from mlte.measurement.result import Real
from mlte.spec import Condition

spec = DetailedSpec(
    {
        TaskEfficacy(): [Condition(ExternalMeasurement("accuracy", Real), "greater_or_equal_to", 0.9),
                         Condition(ExternalMeasurement("confusion matrix", ConfusionMatrix), "misclassification_count_less_than", 2)],
        StorageCost(): [Condition(LocalObjectSize("model size"), "less_than", 3000)],
        #TrainingMemoryCost(),
        TrainingComputeCost(): [Condition(LocalProcessCPUUtilization("training cpu"), "max_utilization_less_than", 5.0)]
    }
)
spec.save()

#spec = DetailedSpec(
#    {
#        TaskEfficacy(),
#        StorageCost(),
#        #TrainingMemoryCost(),
#        TrainingComputeCost()
#    }
#)
#spec.add_condition("TaskEfficacy", Condition(ExternalMeasurement("accuracy", Real), "greater_or_equal_to", 0.9))
print(json.dumps(spec.generate_document(), indent=4))

{
    "schema_version": "0.0.1",
    "metadata": {
        "model_identifier": "IrisClassifier",
        "model_version": "0.0.1",
        "timestamp": 1679502469
    },
    "properties": [
        {
            "name": "TaskEfficacy",
            "description": "The TaskEfficacy property assesses a model's ability to correctly perform instances of its task. The means of measurement for this property will vary by both domain and task. Examples include accuracy, error rate, and average precision, but many others are possible.",
            "measurements": [
                {
                    "name": "accuracy",
                    "type": "ExternalMeasurement",
                    "validator": "greater_or_equal_to",
                    "threshold": 0.9
                },
                {
                    "name": "confusion matrix",
                    "type": "ExternalMeasurement",
                    "validator": "misclassification_count_less_than",
                    "threshol

In [3]:
script = Path.cwd() / "train.py"
args = [
    "--dataset-dir", str(DATASETS_DIR.absolute()),
    "--models-dir", str(MODELS_DIR.absolute())
]
from mlte.measurement import ProcessMeasurement
from mlte.measurement.cpu import CPUStatistics
cpu_measurement = spec.get_measurement("training cpu")
cpu_stats: CPUStatistics = cpu_measurement.evaluate(ProcessMeasurement.start_script(script, args))
spec.add_result(cpu_stats)

from mlte.measurement.result import Integer
storage_measurement = spec.get_measurement("model size")
size: Integer = storage_measurement.evaluate(MODELS_DIR / "model.pkl")
spec.add_result(size)

_, X_test, _, y_test = load_data()
model = load_model()
y_pred = model.predict(X_test.to_numpy())
from sklearn.metrics import accuracy_score
from mlte.measurement.result import Real
accuracy_measurement = spec.get_measurement("accuracy")
accuracy: Real = accuracy_measurement.evaluate(accuracy_score(y_test, y_pred))
spec.add_result(accuracy)

from sklearn.metrics import confusion_matrix
from confusion_matrix import ConfusionMatrix
matrix_measurement = spec.get_measurement("confusion matrix")
matrix: ConfusionMatrix = matrix_measurement.evaluate(confusion_matrix(y_test, y_pred))
spec.add_result(matrix)

results = spec.validate_properties()
bound_spec = spec.generate_bound_spec(results)
print(json.dumps(bound_spec.document, indent=4))

{
    "schema_version": "0.0.1",
    "metadata": {
        "model_identifier": "IrisClassifier",
        "model_version": "0.0.1",
        "timestamp": 1679436756
    },
    "properties": [
        {
            "name": "TaskEfficacy",
            "description": "The TaskEfficacy property assesses a model's ability to correctly perform instances of its task. The means of measurement for this property will vary by both domain and task. Examples include accuracy, error rate, and average precision, but many others are possible.",
            "measurements": [
                {
                    "name": "ExternalMeasurement",
                    "validators": [
                        {
                            "name": "",
                            "result": "Success",
                            "message": "Real magnitude 0.9666666666666667 greater than or equal to threshold 0.9"
                        },
                        {
                            "name": "",
          