# Batch Jobs

## Train model

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib
import numpy as np

dataset = load_iris()
feature_names = dataset.feature_names
class_names = list(dataset.target_names)

X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr = LogisticRegression(max_iter=4000)
lr.fit(X_train, y_train)

joblib.dump(lr, 'model.joblib')

## Copy model to MinIO

We will now copy our model to MinIO installed in the K8s cluster running Seldon. If you have MinIO installed, open up a seperate terminal and port-forward to MinIO with the following command: 

```!kubectl port-forward -n minio-system svc/minio 8090:9000```

Add MinIO host and push the trained model artefact:

In [None]:
!mc config host add minio http://localhost:8090 minioadmin minioadmin

In [None]:
!mc cp model.joblib minio/data/

## Create deployment 

Authenticate to your cluster at `SD_IP` with your `username` and `password`: 

In [None]:
from seldon_deploy_sdk import Configuration, ApiClient, SeldonDeploymentsApi, BatchJobsApi, BatchDefinition
from seldon_deploy_sdk.auth import OIDCAuthenticator
import requests

SD_IP = ""
username = ""
password = ""

config = Configuration()
config.host = f"http://{SD_IP}/seldon-deploy/api/v1alpha1"

config.oidc_client_id = "sd-api"
config.oidc_client_secret = "sd-api-secret"
config.oidc_server = f"http://{SD_IP}/auth/realms/deploy-realm"

def auth():
    auth = OIDCAuthenticator(config)
    config.access_token = auth.authenticate(username, password)
    api_client = ApiClient(config)
    return api_client

Create and launch the deployment: 

In [None]:
DEPLOYMENT_NAME = "iris-sample"
MODEL_LOCATION = "s3://data/model.joblib"

In [None]:
NAMESPACE = "test"
PREPACKAGED_SERVER = "SKLEARN_SERVER"

CPU_REQUESTS = "1"
MEMORY_REQUESTS = "1Gi"

CPU_LIMITS = "1"
MEMORY_LIMITS = "1Gi"

mldeployment = {
    "kind": "SeldonDeployment",
    "metadata": {
        "name": DEPLOYMENT_NAME,
        "namespace": NAMESPACE,
        "labels": {
            "fluentd": "true"
        }
    },
    "apiVersion": "machinelearning.seldon.io/v1alpha2",
    "spec": {
        "name": DEPLOYMENT_NAME,
        "annotations": {
            "seldon.io/engine-seldon-log-messages-externally": "true"
        },
        "protocol": "seldon",
        "transport": "rest",
        "predictors": [
            {
                "componentSpecs": [
                    {
                        "spec": {
                            "containers": [
                                {
                                    "name": f"{DEPLOYMENT_NAME}-container",
                                    "resources": {
                                        "requests": {
                                            "cpu": CPU_REQUESTS,
                                            "memory": MEMORY_REQUESTS
                                        },
                                        "limits": {
                                            "cpu": CPU_LIMITS,
                                            "memory": MEMORY_LIMITS
                                        }
                                    }
                                }
                            ]
                        }
                    }
                ],
                "name": "default",
                "replicas": 1,
                "traffic": 100,
                "graph": {
                    "implementation": PREPACKAGED_SERVER,
                    "modelUri": MODEL_LOCATION,
                    "envSecretRefName": "seldon-job-secret",
                    "name": f"{DEPLOYMENT_NAME}-container",
                    "endpoint": {
                        "type": "REST"
                    },
                    "parameters": [],
                    "children": [],
                    "logger": {
                        "mode": "all"
                    }
                }
            }
        ]
    },
    "status": {}
}

In [None]:
deployment_api = SeldonDeploymentsApi(auth())
deployment_api.create_seldon_deployment(namespace=NAMESPACE, mldeployment=mldeployment)

## Batch job

We can inspect the different input parameters required to configure a batch job: 

In [None]:
BatchDefinition()

Define batch workflow:

In [None]:
WORKFLOW = {
    "batchDataType": "data",
    "batchMethod": "predict",
    "batchRetries": "3",
    "batchTransportProtocol": "rest",
    "batchWorkers": "15",
    "inputData": "s3://data/input-data.txt",
    "objectStoreSecretName": "seldon-job-secret",
    "outputData": "s3://data/output-data-{{workflow.name}}.txt"
}

Launch batch job:

In [None]:
batch_api = BatchJobsApi(auth())
batch_api.create_seldon_deployment_batch_job(name=DEPLOYMENT_NAME, namespace=NAMESPACE, workflow=WORKFLOW)

We can check up on our batch job to see if it completed. Make sure to input the `workflowName` as the `job_name`:

In [None]:
JOB_NAME='workflowName'

batch_api = BatchJobsApi(auth())
batch_api.get_deployment_batch_job(name=DEPLOYMENT_NAME, namespace=NAMESPACE, job_name=JOB_NAME)

Check results of batch in MinIO:

In [None]:
!mc cat minio/data/output-data-<JOB_NAME>.txt