# Baseten custom pipeline use case

Let's assume that we want to upload the following pipeline to be served by baseten. Some differences with the standard use case are:
- It is a scikit-learn pipeline instead of a simple scikit-learn model
- Basides 3rd party libraries that could be install via a `requirements.txt` file, it has also local dependencies

In [1]:
!pip install truss==0.1.0 baseten==0.2.5

Collecting truss==0.1.0
  Using cached truss-0.1.0-py3-none-any.whl (104 kB)
Collecting baseten==0.2.5
  Downloading baseten-0.2.5-py3-none-any.whl (21 kB)
Collecting single-source<0.4.0,>=0.3.0
  Using cached single_source-0.3.0-py3-none-any.whl (5.7 kB)
Collecting packaging<21.0,>=20.9
  Using cached packaging-20.9-py2.py3-none-any.whl (40 kB)
Installing collected packages: single-source, packaging, truss, baseten
  Attempting uninstall: packaging
    Found existing installation: packaging 21.3
    Uninstalling packaging-21.3:
      Successfully uninstalled packaging-21.3
  Attempting uninstall: truss
    Found existing installation: truss 0.0.30
    Uninstalling truss-0.0.30:
      Successfully uninstalled truss-0.0.30
  Attempting uninstall: baseten
    Found existing installation: baseten 0.2.1
    Uninstalling baseten-0.2.1:
      Successfully uninstalled baseten-0.2.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. 

In [1]:
import numpy as np
import sys
sys.path.insert(0, './pipelines/lr_model/')
sys.path.insert(0, './models/')
from joblib import dump
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from pipelines.lr_model.steps import (Disqualify, FeatureCalculator)

LEARNED_INTERCEPT = -1
LEARNED_COEFFICIENTS = [2, 1, 0, 0, 1]
lr_model = LogisticRegression()
lr_model.classes_ = np.array([0, 1])
lr_model.intercept_ = LEARNED_INTERCEPT
lr_model.coef_ = np.array([LEARNED_COEFFICIENTS])

pipeline = Pipeline(
    steps=[
        ("step1", Disqualify()),
        (
            "step2",
            FeatureCalculator(check_qualified=True),
        ),
        ("model", lr_model),
    ]
)

from bson.objectid import ObjectId

SID0 = "abcdef123456abcdef123453"
SID1 = "abcdef123456abcdef123453"
SID2 = "abcdef123456abcdef123454"
SID3 = "abcdef123456abcdef123454"
SID4 = "abcdef123456abcdef543210"

BUILDER = {
    "feature1": SID0,
    "feature2": SID1,
    "feature3": SID4,
    "feature4": SID2,
    "feature5": SID1,
}

ROLE = {
    "feature1": SID0,
    "feature2": SID1,
    "feature3": SID1,
    "feature4": SID3,
    "feature5": SID4,
}

pipeline.predict_proba([(x, ROLE) for x in [BUILDER]])

# somehow I should upload this pipeline and the necessary local dependencies to baseten

[({'feature1': 'abcdef123456abcdef123453', 'feature2': 'abcdef123456abcdef123453', 'feature3': 'abcdef123456abcdef543210', 'feature4': 'abcdef123456abcdef123454', 'feature5': 'abcdef123456abcdef123453'}, {'feature1': 'abcdef123456abcdef123453', 'feature2': 'abcdef123456abcdef123453', 'feature3': 'abcdef123456abcdef123453', 'feature4': 'abcdef123456abcdef123454', 'feature5': 'abcdef123456abcdef543210'})]


array([[0.11920292, 0.88079708]])

## Get local dependencies

In [3]:
!pip install importlab

You should consider upgrading via the '/Users/kyrcha/.pyenv/versions/ateam/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [6]:
!importlab --tree train.py | grep -E '^\s+[^::]*py$'

  warn("The `IPython.kernel` package has been deprecated since IPython 4.0."
  parent = __import__(parent_name, fromlist=['__path__'])
      pipelines/lr_model/steps.py
          models/role.py
          models/builder.py


## Package Model

In [2]:
import truss
handle = truss.mk_truss(
    pipeline, 
    target_directory="./pipeline_truss/", 
    requirements_file="./requirements.txt", 
    bundled_packages=["./models/", "./pipelines/"]
)

You should consider upgrading via the '/Users/kyrcha/.pyenv/versions/ateam/bin/python -m pip install --upgrade pip' command.


## Test Locally

In [3]:
handle.server_predict({'inputs': list([(x, ROLE) for x in [BUILDER]])})

[({'feature1': 'abcdef123456abcdef123453', 'feature2': 'abcdef123456abcdef123453', 'feature3': 'abcdef123456abcdef543210', 'feature4': 'abcdef123456abcdef123454', 'feature5': 'abcdef123456abcdef123453'}, {'feature1': 'abcdef123456abcdef123453', 'feature2': 'abcdef123456abcdef123453', 'feature3': 'abcdef123456abcdef123453', 'feature4': 'abcdef123456abcdef123454', 'feature5': 'abcdef123456abcdef543210'})]
[({'feature1': 'abcdef123456abcdef123453', 'feature2': 'abcdef123456abcdef123453', 'feature3': 'abcdef123456abcdef543210', 'feature4': 'abcdef123456abcdef123454', 'feature5': 'abcdef123456abcdef123453'}, {'feature1': 'abcdef123456abcdef123453', 'feature2': 'abcdef123456abcdef123453', 'feature3': 'abcdef123456abcdef123453', 'feature4': 'abcdef123456abcdef123454', 'feature5': 'abcdef123456abcdef543210'})]


{'predictions': array([1]),
 'probabilities': [[0.11920292202211769, 0.8807970779778823]]}

In [4]:
handle.docker_predict({'inputs': list([(x, ROLE) for x in [BUILDER]])})

#1 [internal] load build definition from Dockerfile
#1 sha256:4a4086b1def6fe79679a902e5a14cb9afa967f84f9ad673d3f48e37b497b5009
#1 transferring dockerfile:
#1 transferring dockerfile: 794B 0.0s done
#1 DONE 0.1s

#2 [internal] load .dockerignore
#2 sha256:edae4cf662d133399923f2a04daefa3aa6a78b2478851d7f5b1187e92b1b96ce
#2 transferring context: 2B done
#2 DONE 0.0s

#3 [internal] load metadata for docker.io/baseten/baseten-server-base-py38:latest
#3 sha256:c7d5235b139eaab6125abf344c196a614320ccef1d728d462d536fdfd2443e6e
#3 DONE 2.6s

#18 [ 1/14] FROM docker.io/baseten/baseten-server-base-py38:latest@sha256:eadb2d14fa0549be9739fd6aa9bc8a9a01bb09d9cb0c58e7445b24af81fc1207
#18 sha256:7e324b8d752114abe7454450669b255822e9a5a6a81d2f757658f3770b413068
#18 DONE 0.0s

#4 [internal] load build context
#4 sha256:ca9ee72d0c64fa4b68ab775941d7d5bbe35711c4d8fecbda6bbae6f2f9782ea8
#4 transferring context: 115B done
#4 ERROR: error from sender: resolve : lstat /private/var/folders/01/ngz274cj2hz8_ng6hrt6

KeyboardInterrupt: 

## Deploy to Baseten

In [5]:
import baseten
baseten.login("")

[32mINFO[0m API key set.


In [6]:
b10_model = baseten.deploy_truss(handle, model_name='test-truss-old')

[32mINFO[0m Serializing [34mtest-truss-old[0m truss.
[32mINFO[0m Making contact with BaseTen üëã üëΩ
[32mINFO[0m üöÄ Uploading model to BaseTen üöÄ
Upload Progress: 100% |[34m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[39m| 7.27k/7.27k
[32mINFO[0m üîÆ Upload successful!üîÆ
[32mINFO[0m eyJtb2RlbF90eXBlIjogIk1vZGVsIiwgIm1vZGVsX2ZyYW1ld29yayI6ICJza2xlYXJuIiwgIm1vZGVsX21vZHVsZV9kaXIiOiAibW9kZWwiLCAibW9kZWxfY2xhc3NfZmlsZW5hbWUiOiAibW9kZWwucHkiLCAibW9kZWxfY2xhc3NfbmFtZSI6ICJNb2RlbCIsICJkYXRhX2RpciI6ICJkYXRhIiwgImlucHV0X3R5cGUiOiAiQW55IiwgIm1vZGVsX21ldGFkYXRhIjogeyJtb2RlbF9iaW5hcnlfZGlyIjogIm1vZGVsIiwgInN1cHBvcnRzX3ByZWRpY3RfcHJvYmEiOiB0cnVlfSwgInJlcXVpcmVtZW50cyI6IFsiYXBwbm9wZT09MC4xLjMiLCAiYXN0dG9rZW5zPT0yLjAuNSIsICJiY

In [9]:
import json
from bson import ObjectId

class JSONEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, ObjectId):
            return str(o)
        return json.JSONEncoder.default(self, o)

request = {'role': ROLE, 'builder': BUILDER}
# serial_req = JSONEncoder().encode(request)

b10_model.predict([request], prediction_only=False)

ApiError: Error calling predict.
<Server response: b'{"error": "Failed to invoke model version zq8odg3. \\nTraceback (most recent call last):\\n  File \\"/app/model_wrapper.py\\", line 55, in predict\\n    return self._model.predict(request)\\n  File \\"/app/model/model.py\\", line 44, in predict\\n    result = self._model.predict(inputs)\\n  File \\"/usr/local/lib/python3.8/site-packages/sklearn/pipeline.py\\", line 457, in predict\\n    Xt = transform.transform(Xt)\\n  File \\"/packages/pipelines/lr_model/steps.py\\", line 15, in transform\\n    return [self._disqualify(*x) for x in X]\\n  File \\"/packages/pipelines/lr_model/steps.py\\", line 15, in <listcomp>\\n    return [self._disqualify(*x) for x in X]\\n  File \\"/packages/pipelines/lr_model/steps.py\\", line 22, in _disqualify\\n    disqualified = Disqualify.check_if_disqualified(\\n  File \\"/packages/pipelines/lr_model/steps.py\\", line 36, in check_if_disqualified\\n    if builder.get(\\"feature1\\") is None:\\nAttributeError: \'str\' object has no attribute \'get\'\\n"}'>

In [11]:
!curl -X POST https://app.baseten.co/model_versions/zq8odg3/predict -H 'Authorization: Api-Key ' -d '{"inputs": [{"role": {"feature1": "abcdef123456abcdef123453", "feature2": "abcdef123456abcdef123453", "feature3": "abcdef123456abcdef123453", "feature4": "abcdef123456abcdef123454", "feature5": "abcdef123456abcdef543210"}, "builder": {"feature1": "abcdef123456abcdef123453", "feature2": "abcdef123456abcdef123453", "feature3": "abcdef123456abcdef543210", "feature4": "abcdef123456abcdef123454", "feature5": "abcdef123456abcdef123453"}}]}'

{"error": "Failed to invoke model version zq8odg3. \nTraceback (most recent call last):\n  File \"/app/model_wrapper.py\", line 55, in predict\n    return self._model.predict(request)\n  File \"/app/model/model.py\", line 44, in predict\n    result = self._model.predict(inputs)\n  File \"/usr/local/lib/python3.8/site-packages/sklearn/pipeline.py\", line 457, in predict\n    Xt = transform.transform(Xt)\n  File \"/packages/pipelines/lr_model/steps.py\", line 15, in transform\n    return [self._disqualify(*x) for x in X]\n  File \"/packages/pipelines/lr_model/steps.py\", line 15, in <listcomp>\n    return [self._disqualify(*x) for x in X]\n  File \"/packages/pipelines/lr_model/steps.py\", line 22, in _disqualify\n    disqualified = Disqualify.check_if_disqualified(\n  File \"/packages/pipelines/lr_model/steps.py\", line 36, in check_if_disqualified\n    if builder.get(\"feature1\") is None:\nAttributeError: 'str' object has no attribute 'get'\n"}