<a href="https://colab.research.google.com/github/raghulchandramouli/AI-Web-Scraper/blob/master/MLOps_Orchestrator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MLOps Pipeline with ZenML

In [None]:
%pip install "zenml[server]"
!zenml integration install sklearn -y
%pip install pyparsing==2.4.2 # Model required for colab purpose

# Importing IPYNB:
import IPython

# Restart Kernel:
IPython.Application.instance().kernel.do_shutdown(restart=True)

In [None]:
NGROK_TOKEN = "YOUR_NGROK_TOKEN""

In [None]:
from zenml.environment import Environment

if Environment.in_google_colab(): # Colab set up

    # Install and authenticate ngrok
    !pip install pyngrok
    !ngrok authtoken $NGROK_TOKEN

# ZenML Setup

In [None]:
! rm -rf .zen
! zenml init

# Experimentations
In this example I plan on training a model on `Handwritten text` data and add MLOps Capability

In [None]:
import numpy as np
from sklearn.base import ClassifierMixin
from sklearn.svm import SVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

def train_test() -> None:
  """Train and test a Scikit-learn SVC Classifier on digits"""

  digits = load_digits()
  data = digits.images.reshape((len(digits.images), -1))

  X_train, X_test, y_train, y_test = train_test_split(

      data, digits.target, test_size=0.2, shuffle=False

      )

  model = SVC(gamma=0.001)
  model.fit(X_train, y_train)
  test_acc = model.score(X_test, y_test)
  print(f"Test accuracy: {test_acc}")

train_test()

# Annotations used in ZenML

In [None]:
from zenml import step
from typing_extensions import Annotated
import pandas as pd
from typing import Tuple

@step
def importer() -> Tuple[
    Annotated[pd.DataFrame, "X_train"],
    Annotated[pd.DataFrame, "X_test"],
    Annotated[pd.Series, "y_train"],
    Annotated[pd.Series, "y_test"],
]:

  """Load the digits datasets as numpy arrays"""
  digits = load_digits()
  data = digits.images.reshape((len(digits.images), -1))

  X_train, X_test, y_train, y_test = train_test_split(

      data, digits.target, test_size=0.2, shuffle=False

  )

  return X_train, X_test, y_train, y_test


@step
def svc_trainer(
    X_train: np.ndarray,
    y_train: np.ndarray,
) -> ClassifierMixin:

  """Train and return an SVC classifier"""
  model = SVC(gamma=0.001)
  model.fit(X_train, y_train)
  return model

@step
def evaluator(
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: ClassifierMixin,
) -> float:

  """Calculate the test set accuracy of an SVC model"""
  test_acc = model.score(X_test, y_test)
  print(f"Test accuracy: {test_acc}")
  return test_acc


# Creating ZenML Pipeline

Note this pipeline is actually a `Step Functions`

In [None]:
from zenml import pipeline

@pipeline
def digits_pipeline():
  """Links all the steps together in a pipeline"""
  X_train, X_test, y_train, y_test = importer()
  model = svc_trainer(X_train=X_train, y_train=y_train)
  evaluator(X_test=X_test, y_test=y_test, model=model)

digits_pipeline()

**Running the ZenML PipeLine**

In [None]:
digits_pipeline_instance = digits_pipeline()