# Deployin the model in RHOAI single-model serving (via code)

In [None]:
%%bash

# if you need to delete things, uncomment these 2 lines:
# oc delete inferenceservices wildfire01
# oc delete servingruntime wildfire01

## Create the "ServingRuntime" object

In [None]:
%%bash

cat << EOF | oc apply -f-
---
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
  annotations:
    opendatahub.io/accelerator-name: ''
    opendatahub.io/apiProtocol: REST
    opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
    opendatahub.io/template-display-name: OpenVINO Model Server
    opendatahub.io/template-name: kserve-ovms
    openshift.io/display-name: wildfire01
  name: wildfire01
  labels:
    opendatahub.io/dashboard: 'true'
spec:
  annotations:
    prometheus.io/path: /metrics
    prometheus.io/port: '8888'
  containers:
    - args:
        - '--model_name={{.Name}}'
        - '--port=8001'
        - '--rest_port=8888'
        - '--model_path=/mnt/models'
        - '--file_system_poll_wait_seconds=0'
        - '--grpc_bind_address=0.0.0.0'
        - '--rest_bind_address=0.0.0.0'
        - '--target_device=AUTO'
        - '--metrics_enable'
      image: 'quay.io/modh/openvino_model_server@sha256:f1140e9d987580d1aab1ccc62519b48b1d2673308b2db496e9e505e3be788d9f'
      name: kserve-container
      ports:
        - containerPort: 8888
          protocol: TCP
      volumeMounts:
        - mountPath: /dev/shm
          name: shm
  multiModel: false
  protocolVersions:
    - v2
    - grpc-v2
  supportedModelFormats:
    - autoSelect: true
      name: openvino_ir
      version: opset13
    - name: onnx
      version: '1'
    - autoSelect: true
      name: tensorflow
      version: '1'
    - autoSelect: true
      name: tensorflow
      version: '2'
    - autoSelect: true
      name: paddle
      version: '2'
    - autoSelect: true
      name: pytorch
      version: '2'
  volumes:
    - emptyDir:
        medium: Memory
        sizeLimit: 2Gi
      name: shm

EOF

## Create the "InferenceService" Object

In [None]:
%%bash

cat << EOF | oc apply -f-
---
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
  annotations:
    openshift.io/display-name: wildfire01
    serving.knative.openshift.io/enablePassthrough: 'true'
    sidecar.istio.io/inject: 'true'
    sidecar.istio.io/rewriteAppHTTPProbers: 'true'
  name: wildfire01
  finalizers:
    - inferenceservice.finalizers
  labels:
    # networking.knative.dev/visibility: cluster-local
    opendatahub.io/dashboard: 'true'
spec:
  predictor:
    maxReplicas: 1
    minReplicas: 1
    model:
      modelFormat:
        name: onnx
        version: '1'
      name: ''
      resources:
        limits:
          cpu: '1'
          memory: 1100Mi
        requests:
          cpu: 300m
          memory: 10Mi
      runtime: wildfire01
      storage:
        key: aws-connection-minio-wildfire
        path: wildfire_onnx/
EOF

## Confirm Objects have been created

In [None]:
%%bash

oc get ServingRuntime,inferenceservice

## Wait for pod to become ready

In [None]:
%%bash

oc get pods -l service.istio.io/canonical-name=wildfire01-predictor

while true; do
  POD_STATUS=$(oc get pods -l service.istio.io/canonical-name=wildfire01-predictor -o json | jq '.items[].status.phase' | grep -c "Running")
  if [ $POD_STATUS -eq $(oc get pods -l service.istio.io/canonical-name=wildfire01-predictor -o json | jq '.items | length') ]; then
    echo "All pods are running"
    break
  else
    echo "Not all pods are running. Status: $(oc get pods -l service.istio.io/canonical-name=wildfire01-predictor -o json | jq '.items[].status.phase')"
    sleep 2
  fi
done
