In [3]:
from typing_extensions import Annotated  # or `from typing import Annotated on Python 3.9+
from typing import Tuple
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.base import ClassifierMixin
from sklearn.svm import SVC

from zenml import pipeline, step


import logging

@step
def training_data_loader() -> Tuple[
    Annotated[pd.DataFrame, "X_train"],
    Annotated[pd.DataFrame, "X_test"],
    Annotated[pd.Series, "y_train"],
    Annotated[pd.Series, "y_test"],
]:
    """Load the iris dataset as a tuple of Pandas DataFrame / Series."""
    logging.info("Loading iris...")
    iris = load_iris(as_frame=True)
    logging.info("Splitting train and test...")
    X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target, test_size=0.2, shuffle=True, random_state=42
    )
    return X_train, X_test, y_train, y_test


@step(enable_cache=False)
def svc_trainer(
        X_train: pd.DataFrame,
        y_train: pd.Series,
        gamma: float = 0.001,
) -> Tuple[
    Annotated[ClassifierMixin, "trained_model"],
    Annotated[float, "training_acc"],
]:
    """Train a sklearn SVC classifier."""

    model = SVC(gamma=gamma)
    model.fit(X_train.to_numpy(), y_train.to_numpy())

    train_acc = model.score(X_train.to_numpy(), y_train.to_numpy())
    print(f"Train accuracy: {train_acc}")

    return model, train_acc


@step
def digits_data_loader() -> Tuple[
    Annotated[pd.DataFrame, "X_train"],
    Annotated[pd.DataFrame, "X_test"],
    Annotated[pd.Series, "y_train"],
    Annotated[pd.Series, "y_test"],
]:
    """Loads the digits dataset and splits it into train and test data."""
    # Load data from the digits dataset
    digits = load_digits(as_frame=True)
    # Split into datasets
    X_train, X_test, y_train, y_test = train_test_split(
        digits.data, digits.target, test_size=0.2, shuffle=True
    )
    return X_train, X_test, y_train, y_test


@pipeline
def first_pipeline(gamma: float = 0.002):
    X_train, X_test, y_train, y_test = digits_data_loader()
    svc_trainer(gamma=gamma, X_train=X_train, y_train=y_train)


@pipeline
def first_pipeline(gamma: float = 0.002):
    X_train, X_test, y_train, y_test = training_data_loader()
    svc_trainer(gamma=gamma, X_train=X_train, y_train=y_train)

In [6]:
if __name__ == "__main__":
    first_pipeline = first_pipeline.with_options( enable_cache=False)
    first_pipeline()

[1;35mInitiating a new run for the pipeline: [0m[1;36mfirst_pipeline[1;35m.[0m


In [7]:
from zenml.client import Client

pipeline_model = Client().get_pipeline("first_pipeline")

In [8]:
pipeline_model

PipelineResponseModel(id=UUID('821b24df-d929-433f-bb3d-89c61212bc24'), created=datetime.datetime(2023, 7, 23, 10, 5, 56, 944174), updated=datetime.datetime(2023, 7, 23, 10, 5, 56, 944176), user=UserResponseModel(id=UUID('e7ece4b5-723e-4da4-be79-dfa7fd716705'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 737843), updated=datetime.datetime(2023, 7, 22, 15, 52, 34, 746723), name='default', full_name='', email_opted_in=True, hub_token=None, active=True, activation_token=None, teams=None, roles=None, email=None), workspace=WorkspaceResponseModel(id=UUID('16d21198-4608-42dd-bfcd-beb2145df6f3'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 353214), updated=datetime.datetime(2023, 7, 22, 15, 49, 21, 353216), name='default', description=''), name='first_pipeline', version='3', version_hash='1aeab08b326261b239b5496aacf3580e', docstring=None, spec=PipelineSpec(version='0.4', source=Source(module='pipeline', attribute='first_pipeline', type=<SourceType.USER: 'user'>), parameters={'gam

In [9]:
pipelines = Client().list_pipelines()

In [10]:
pipelines

Page(index=1, max_size=50, total_pages=1, total=4, items=[PipelineResponseModel(id=UUID('96f88f65-e0e9-4d5c-abd5-0a14bfeb727c'), created=datetime.datetime(2023, 7, 22, 15, 49, 28, 430409), updated=datetime.datetime(2023, 7, 22, 15, 49, 28, 430411), user=UserResponseModel(id=UUID('e7ece4b5-723e-4da4-be79-dfa7fd716705'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 737843), updated=datetime.datetime(2023, 7, 22, 15, 52, 34, 746723), name='default', full_name='', email_opted_in=True, hub_token=None, active=True, activation_token=None, teams=None, roles=None, email=None), workspace=WorkspaceResponseModel(id=UUID('16d21198-4608-42dd-bfcd-beb2145df6f3'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 353214), updated=datetime.datetime(2023, 7, 22, 15, 49, 21, 353216), name='default', description=''), name='my_pipeline', version='1', version_hash='63a1c9729b32d38ed4753b94039302ab', docstring=None, spec=PipelineSpec(version='0.4', source=Source(module='run', attribute='my_pipeline', 

In [11]:
pipeline_model.runs

[PipelineRunResponseModel(id=UUID('1dafe204-c206-454e-9130-fc925a7ed72a'), created=datetime.datetime(2023, 7, 23, 10, 9, 5, 948570), updated=datetime.datetime(2023, 7, 23, 10, 9, 7, 762275), user=UserResponseModel(id=UUID('e7ece4b5-723e-4da4-be79-dfa7fd716705'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 737843), updated=datetime.datetime(2023, 7, 22, 15, 52, 34, 746723), name='default', full_name='', email_opted_in=True, hub_token=None, active=True, activation_token=None, teams=None, roles=None, email=None), workspace=WorkspaceResponseModel(id=UUID('16d21198-4608-42dd-bfcd-beb2145df6f3'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 353214), updated=datetime.datetime(2023, 7, 22, 15, 49, 21, 353216), name='default', description=''), name='first_pipeline-2023_07_23-10_09_05_938144', orchestrator_run_id='ee63c951-6355-410e-ae00-3f5008ec1311', schedule_id=None, enable_cache=None, start_time=datetime.datetime(2023, 7, 23, 10, 9, 5, 941776), end_time=datetime.datetime(2023, 7

In [15]:
pipeline_model.runs[0].steps

{'training_data_loader': StepRunResponseModel(id=UUID('4e1a0d21-03e8-4e73-9cba-23773dd4cb14'), created=datetime.datetime(2023, 7, 23, 10, 9, 5, 987018), updated=datetime.datetime(2023, 7, 23, 10, 9, 7, 78022), user=UserResponseModel(id=UUID('e7ece4b5-723e-4da4-be79-dfa7fd716705'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 737843), updated=datetime.datetime(2023, 7, 22, 15, 52, 34, 746723), name='default', full_name='', email_opted_in=True, hub_token=None, active=True, activation_token=None, teams=[], roles=[RoleResponseModel(id=UUID('808dc440-ecb9-4c31-83c9-7bc73bef032b'), created=datetime.datetime(2023, 7, 22, 15, 49, 19, 927002), updated=datetime.datetime(2023, 7, 22, 15, 49, 19, 927004), name='admin', permissions={<PermissionType.READ: 'read'>, <PermissionType.WRITE: 'write'>, <PermissionType.ME: 'me'>})], email=None), workspace=WorkspaceResponseModel(id=UUID('16d21198-4608-42dd-bfcd-beb2145df6f3'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 353214), updated=datetim

In [17]:
pipeline_model.runs[0].steps["training_data_loader"].outputs

{'y_test': ArtifactResponseModel(id=UUID('10cde501-15fe-4411-b031-2b8c8031aaa7'), created=datetime.datetime(2023, 7, 23, 10, 9, 6, 970446), updated=datetime.datetime(2023, 7, 23, 10, 9, 6, 970448), user=UserResponseModel(id=UUID('e7ece4b5-723e-4da4-be79-dfa7fd716705'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 737843), updated=datetime.datetime(2023, 7, 22, 15, 52, 34, 746723), name='default', full_name='', email_opted_in=True, hub_token=None, active=True, activation_token=None, teams=[], roles=[RoleResponseModel(id=UUID('808dc440-ecb9-4c31-83c9-7bc73bef032b'), created=datetime.datetime(2023, 7, 22, 15, 49, 19, 927002), updated=datetime.datetime(2023, 7, 22, 15, 49, 19, 927004), name='admin', permissions={<PermissionType.READ: 'read'>, <PermissionType.WRITE: 'write'>, <PermissionType.ME: 'me'>})], email=None), workspace=WorkspaceResponseModel(id=UUID('16d21198-4608-42dd-bfcd-beb2145df6f3'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 353214), updated=datetime.datetime(2

In [20]:
output_torch_model = pipeline_model.runs[0].steps["svc_trainer"].outputs["trained_model"].load()

In [21]:
pipeline_model.runs[0].steps["svc_trainer"].outputs["trained_model"].metadata

{'storage_size': RunMetadataResponseModel(id=UUID('07c6e161-67f7-4a0c-91b7-1ccc9dcaaef5'), created=datetime.datetime(2023, 7, 23, 10, 9, 7, 479446), updated=datetime.datetime(2023, 7, 23, 10, 9, 7, 479448), user=UserResponseModel(id=UUID('e7ece4b5-723e-4da4-be79-dfa7fd716705'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 737843), updated=datetime.datetime(2023, 7, 22, 15, 52, 34, 746723), name='default', full_name='', email_opted_in=True, hub_token=None, active=True, activation_token=None, teams=[], roles=[RoleResponseModel(id=UUID('808dc440-ecb9-4c31-83c9-7bc73bef032b'), created=datetime.datetime(2023, 7, 22, 15, 49, 19, 927002), updated=datetime.datetime(2023, 7, 22, 15, 49, 19, 927004), name='admin', permissions={<PermissionType.READ: 'read'>, <PermissionType.WRITE: 'write'>, <PermissionType.ME: 'me'>})], email=None), workspace=WorkspaceResponseModel(id=UUID('16d21198-4608-42dd-bfcd-beb2145df6f3'), created=datetime.datetime(2023, 7, 22, 15, 49, 21, 353214), updated=datetime.d

In [23]:
pipeline_model.runs[0].steps["svc_trainer"].outputs["trained_model"].visualize()