# Deploying models to production
use the `mlops_workshop_deployment` python environment for this task. If it's not yet installed, ask!


edit `bonus/wine-model.yaml`, especially name the model and set the
s3 path you'll either from MLFlow or by going to MinIO and going through the buckets `localhost:9001` (ask instructors for username/password)

In [None]:
import requests
import time

In [None]:
# Add model s3 path to manifest first. Name the model as well.
!kubectl apply -f bonus/wine-model.yaml

inferenceservice.serving.kserve.io/wine-model-workshop created


In [None]:
# 'wine-model-workshop' is from the manifest
# let's check its running now
!kubectl get isvc wine-model-workshop -n kserve-inference
# waiting version
#!kubectl get isvc wine-model-workshop -n kserve-inference -w

In [None]:
# check pods
!kubectl get pods -n kserve-inference -l serving.kserve.io/inferenceservice=wine-model-workshop

NAME                                                              READY   STATUS        RESTARTS   AGE
wine-model-workshop-predictor-00001-deployment-8585d85bbf-v7695   1/2     Terminating   0          9m52s


In [None]:
# check pods
!kubectl get pods -n kserve-inference -l serving.kserve.io/inferenceservice=wine-model-workshop

NAME                                                              READY   STATUS     RESTARTS   AGE
wine-model-workshop-predictor-00001-deployment-6fb494cc94-8cggg   0/2     Init:0/1   0          8s


In [None]:
# check pods
!kubectl get pods -n kserve-inference -l serving.kserve.io/inferenceservice=wine-model-workshop

NAME                                                              READY   STATUS    RESTARTS   AGE
wine-model-workshop-predictor-00001-deployment-6fb494cc94-8cggg   2/2     Running   0          28s


In [None]:
# 'wine-model-workshop' is from the manifest
# let's check its running now
!kubectl get isvc wine-model-workshop -n kserve-inference
# waiting version
#!kubectl get isvc wine-model-workshop -n kserve-inference -w

NAME                  URL                                                       READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION                   AGE
wine-model-workshop   http://wine-model-workshop.kserve-inference.example.com   True           100                              wine-model-workshop-predictor-00001   32s


In [None]:
# Send some requests

# Send requests
# TODO import data from data folder
single_input = [7.8, 0.58, 0.02, 2, 0.073, 9, 18, 0.9968, 3.36, 0.57, 9.5]
model_name = "wine-model-workshop"

headers = {}
headers["Host"] = f"{model_name}.kserve-inference.example.com"
url = f"http://kserve-gateway.local:30200/v1/models/{model_name}:predict"

def send_requests(count=30, input_length=1):
    """
    Send requests to a inference service for predicting wine quality score in every 0.5s
    Args; 
        count: Number of requests
        input_length: Number of inputs in each request
    """
    for _ in range(count):
        req_data={"instances": [single_input for _ in range(input_length)]}
        requests.post(url, json=req_data, headers=headers)
        time.sleep(0.5)

In [None]:
# Send 30 requests in every 0.5s, each request has one input(i.e., data point)
send_requests(count=3, input_length=1)

In [None]:
# do a POST, print the response
req_data={"instances": [single_input]}
response = requests.post(url, json=req_data, headers=headers)
print(response.json())

{'predictions': [5.561975350332427]}


In [None]:
# load data from data/2_data_train.csv
import pandas as pd
data = pd.read_csv("data/2_data_train.csv")
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,11.9,0.38,0.51,2.0,0.121,7.0,20.0,0.9996,3.24,0.76,10.4,6
1,7.5,0.725,0.04,1.5,0.076,8.0,15.0,0.99508,3.26,0.53,9.6,5
2,11.5,0.3,0.6,2.0,0.067,12.0,27.0,0.9981,3.11,0.97,10.1,6
3,8.3,0.65,0.1,2.9,0.089,17.0,40.0,0.99803,3.29,0.55,9.5,5
4,6.4,0.53,0.09,3.9,0.123,14.0,31.0,0.9968,3.5,0.67,11.0,4


In [None]:
# send first 10 rows except the last column as input
inputs = data.iloc[:10, :-1].values.tolist()
req_data={"instances": inputs}
response = requests.post(url, json=req_data, headers=headers)
print(response.json())

{'predictions': [5.540902974823476, 5.57115344010752, 5.5344410810071505, 5.498407770896474, 5.52594204022175, 5.53680682405682, 5.500948740400692, 5.507585860671566, 5.354274530453766, 5.128414614307973]}


In [None]:
# send first 10 rows except the last column as input
#inputs = data.iloc[:10, :-1].values.tolist()
inputs = data.iloc[:10].values.tolist()
req_data={"instances": inputs}
response = requests.post(url, json=req_data, headers=headers)
print(response.json())

{'error': 'X has 12 features, but ElasticNet is expecting 11 features as input.'}
