In [0]:
%%bash
pip install langchain==0.0.244 datarobotx==0.1.14 datarobot-drum langchain-community databricks-vectorsearch databricks

Collecting databricks
  Downloading databricks-0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: databricks
Successfully installed databricks-0.2



[notice] A new release of pip available: 22.2.2 -> 24.0
[notice] To update, run: pip install --upgrade pip


In [0]:
def load_model(input_dir):
    """Custom model hook for loading our knowledge base."""
    import os

    import datarobot_drum as drum
    from databricks.vector_search.client import VectorSearchClient
    DBX_PAT = "YOUR_DBX_PAT"
    ENDPOINT_NAME = "YOUR_VSI_ENDPOINT_NAME"
    INDEX_NAME = "YOUR_VSI_NAME"

    vsc = VectorSearchClient(
        workspace_url="https://ohio.cloud.databricks.com", personal_access_token=DBX_PAT,
    )
    index = vsc.get_index(
        endpoint_name=ENDPOINT_NAME, index_name=INDEX_NAME
    )
    return index


def score_unstructured(model, data, query, **kwargs) -> str:
    """Custom model hook for making completions with our knowledge base.

    datarobot-user-models (DRUM) handles loading the model and calling
    this function with the appropriate parameters.
    """
    import json

    from langchain.chains import ConversationalRetrievalChain
    from langchain.chat_models import AzureChatOpenAI
    from langchain_community.vectorstores import DatabricksVectorSearch

    try:
        index = model
        data_dict = json.loads(data)

        dvs = DatabricksVectorSearch(index)
        documents = dvs.similarity_search(data_dict["question"])
        relevant_text_list = [doc.page_content for doc in documents]
        rv = {"relevant": relevant_text_list}
    except Exception as e:
        rv = {"error": f"{e.__class__.__name__}: {str(e)}"}
    return json.dumps(rv), {"mimetype": "application/json", "charset": "utf8"}

In [0]:
import datarobotx as drx

drx.Context(
    endpoint="https://app.datarobot.com/api/v2",
    token="YOUR_DR_API_TOKEN",
)

{'token': 'NjBmOTkzMWRkNzYyNTBlYTVhMGI0M2MzOnJWWS9kV3prZnBPUFd1Z09CUnoyN1VZQmVXZFdCbFU5RmVXN1hSaTFjRkk9', 'endpoint': 'https://app.datarobot.com/api/v2', 'pred_server_id': '', 'enable_api_consumer_tracking': True, 'trace_context': None}

In [0]:
deployment = drx.deploy(
    model=None,
    name="External DR KB with DBx",
    hooks={"score_unstructured": score_unstructured, "load_model": load_model},
    extra_requirements=[
        "langchain",
        "langchain-community",
        "databricks-vectorsearch",
        "databricks",
    ]
)

VBox()

VBox()

VBox()

In [0]:
deployment.predict_unstructured(
    {"question": "Why did you donate?",}
)

VBox()

{'relevant': ['I donated because I care :)',
  'I gave because I am so grateful to your organization for offering to match donations for my project, that I donated myself; to take advantage of the opportunity.  Thank you Donors Choose for being so generous and for doing good work. ',
  'I donated because I want to support kids in the \n',
  'I gave because I asked you to give']}