# ZenML Pipelines for SKLearn

In [None]:
!pip install zenml
!zenml integration install sklearn

## Initialize the Project

In [1]:
!zenml connect --url http://192.168.2.110:8888 --username admin --password zenml

[1;35mNumExpr defaulting to 8 threads.[0m
[2;36mConnecting to: [0m[2;32m'http://192.168.2.110:8888'[0m[2;33m...[0m
[1;35mUpdated the global store configuration.[0m


In [None]:
!rm -rf .zen
!zenml init

## Build an SciKit-Learn SVC Image Classifier

In [3]:
import numpy as np
from sklearn.base import ClassifierMixin
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [17]:
def train_test():
    """Train and test a SKLearn SVC classifier on digits"""
    digits = load_digits()
    data = digits.images.reshape((len(digits.images), -1))
    
    X_train, X_test, y_train, y_test = train_test_split(
        data, digits.target, test_size=0.2, shuffle=False
    )

    model = SVC(gamma=0.001)
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    print(f"Test Accuracy: {score}")

In [18]:
train_test()

Test Accuracy: 0.9583333333333334


## Run the Classifier using a ZenML Pipeline

### Define Steps

Define the classifier in 3 ZenML Pipeline steps - __Data Loading__, __Model Training__ and __Model Evaluation__.

In [4]:
from zenml import step
from typing_extensions import Annotated
from typing import Tuple

In [5]:
@step
def importer() -> Tuple[
    Annotated[np.ndarray, "X_train"],
    Annotated[np.ndarray, "X_test"],
    Annotated[np.ndarray, "y_train"],
    Annotated[np.ndarray, "y_test"],
]:
    """Load the digits dataset as numpy arrays."""
    digits = load_digits()
    data = digits.images.reshape((len(digits.images), -1))
    X_train, X_test, y_train, y_test = train_test_split(
        data, digits.target, test_size=0.2, shuffle=False
    )
    return X_train, X_test, y_train, y_test

[1;35mNumExpr defaulting to 8 threads.[0m


In [6]:
@step
def svc_trainer(
            X_train: np.ndarray,
            y_train: np.ndarray
        ) -> ClassifierMixin:
    """Train the SVC classifier."""
    model = SVC(gamma=0.001)
    model.fit(X_train, y_train)

    return model

In [7]:
@step
def evaluator(
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: ClassifierMixin
) -> float:
    """Calculate the model accuracy using the test set."""
    score = model.score(X_test, y_test)
    print(f"Test Accuracy: {score}")

    return score

### Define Pipeline of Steps

In [8]:
from zenml import pipeline

In [9]:
@pipeline
def digits_classifier():
    """SVC digits classifier pipeline"""
    X_train, X_test, y_train, y_test = importer()
    model = svc_trainer(X_train, y_train=y_train)
    evaluator(X_test=X_test, y_test=y_test, model=model)

### Initialize Pipeline

In [10]:
classifier = digits_classifier()

[1;35mInitiating a new run for the pipeline: [0m[1;36mdigits_classifier[1;35m.[0m
[1;35mRegistered new version: [0m[1;36m(version 1)[1;35m.[0m
[1;35mExecuting a new run.[0m
[1;35mUsing user: [0m[1;36madmin[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35mStep [0m[1;36mimporter[1;35m has started.[0m
[1;35mStep [0m[1;36mimporter[1;35m has finished in [0m[1;36m3.163s[1;35m.[0m
[1;35mStep [0m[1;36msvc_trainer[1;35m has started.[0m
[1;35mStep [0m[1;36msvc_trainer[1;35m has finished in [0m[1;36m0.644s[1;35m.[0m
[1;35mStep [0m[1;36mevaluator[1;35m has started.[0m
Test Accuracy: 0.9583333333333334
[1;35mStep [0m[1;36mevaluator[1;35m has finished in [0m[1;36m0.582s[1;35m.[0m
[1;35mRun [0m[1;36mdigits_classifier-2023_09_27-08_22_04_011726[1;35m has finished in [0m[1;36m6.684s[1;35m.[0m
[1;35mDashboard URL: htt

## Visualize the Results

Head over to the dashboard URL given above to see the visualization of the pipeline run:

![ZenML Pipelines for SKLearn](../assets/ZenML_MLFlow_01.webp)