In [0]:
%pip install mlflow==2.10.1 langchain databricks-vectorsearch==0.22 databricks-sdk==0.18.0 mlflow[databricks]
dbutils.library.restartPython()

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
%run ./00-init $reset_all_data=false

In [0]:
index_name=f"{catalog}.{db}.vector_search_index"
host = "https://" + spark.conf.get("spark.databricks.workspaceUrl")

In [0]:
# url used to send the request to your model from the serverless endpoint
import os
host = "https://" + spark.conf.get("spark.databricks.workspaceUrl")
os.environ['DATABRICKS_TOKEN'] = dbutils.secrets.get('databricksscope', 'databricks-token') 
#os.environ['DATABRICKS_TOKEN'] = ""

In [0]:
from databricks.vector_search.client import VectorSearchClient
from langchain_community.vectorstores import DatabricksVectorSearch
from langchain_community.embeddings import DatabricksEmbeddings

# Test embedding Langchain model
#NOTE: your question embedding model must match the one used in the chunk in the previous model 
embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
print(f"Test embeddings: {embedding_model.embed_query('What is Apache Spark?')[:20]}...")

def get_retriever(persist_dir: str = None):
    os.environ["DATABRICKS_HOST"] = host
    #Get the vector search index
    vsc = VectorSearchClient(workspace_url=host, personal_access_token=os.environ["DATABRICKS_TOKEN"])
    vs_index = vsc.get_index(
        endpoint_name=VECTOR_SEARCH_ENDPOINT_NAME,
        index_name=index_name
    )

    # Create the retriever
    vectorstore = DatabricksVectorSearch(
        vs_index, text_column="content", embedding=embedding_model
    )
    return vectorstore.as_retriever()

# test our retriever
vectorstore = get_retriever()
similar_documents = vectorstore.get_relevant_documents("What policies does Woodgrove Financial have in place to ensure workplace safety?")
print(f"Relevant documents: {similar_documents[0]}")

Test embeddings: [0.0185699462890625, -0.01403045654296875, -0.057647705078125, 0.003448486328125, 0.008575439453125, -0.0216827392578125, -0.0247344970703125, -0.0047149658203125, 0.0136260986328125, 0.050323486328125, -0.027496337890625, -0.0147247314453125, 0.05474853515625, -0.053802490234375, -0.01025390625, -0.0161895751953125, -0.018768310546875, -0.017181396484375, -0.051177978515625, 0.0178680419921875]...
[NOTICE] Using a Personal Authentication Token (PAT). Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True to VectorSearchClient().


  warn_deprecated(


Relevant documents: page_content='Introduction\nWoodgrove Financial provides industry-leading benefits to help you and your family get and stay well, prepare for your future, and enjoy life’s journey. Whether you are expecting a new child, looking for some legal advice for a new home, or managing a health condition, Woodgrove Financial is here to support you with benefits and resources to help you live life well.\nThis Summary Plan Description (SPD) provides details of the health and welfare benefits available to eligible employees and their eligible dependents, as described in this SPD. Other summary plan descriptions address health and welfare benefits that may be offered to other employees and their eligible dependents.\nAbout the SPD\nThis document is intended to serve as a Summary Plan Description (SPD) as defined by the Employee Retirement Income Security Act of 1974 (ERISA) for such programs described within that are governed by ERISA. The terms and conditions of the Woodgrove F

In [0]:
# Test Databricks Foundation LLM model
from langchain_community.chat_models import ChatDatabricks
chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-1-405b-instruct", max_tokens = 2000)  # azure-openai-gpt4-model
print(f"Test chat model: {chat_model.predict('What policies does Woodgrove Financial have in place to ensure workplace safety?')}")

  warn_deprecated(


Test chat model: I couldn't find any information on a company called Woodgrove Financial. It's possible that it's a fictional company or a private company that doesn't publicly disclose its policies.

However, I can provide general information on common policies that companies may have in place to ensure workplace safety. These can include:

1. Occupational Health and Safety (OHS) policies: Many companies have a written OHS policy that outlines their commitment to providing a safe work environment and the procedures for reporting and investigating incidents.
2. Hazard identification and risk assessment: Companies may have procedures in place to identify potential hazards in the workplace and assess the risks associated with them.
3. Training and education: Companies may provide regular training and education to employees on workplace safety procedures, emergency response plans, and the use of personal protective equipment (PPE).
4. Incident reporting and investigation: Companies may ha

### Building chain

In [0]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatDatabricks

TEMPLATE = """You are an assistant for Woodgrove Financial users. You are answering  If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible.
Use the following pieces of context to answer the question at the end:
{context}
Question: {question}
Answer:
"""
prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])

chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    chain_type="stuff",
    retriever=get_retriever(),
    chain_type_kwargs={"prompt": prompt}
)

[NOTICE] Using a Personal Authentication Token (PAT). Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True to VectorSearchClient().




In [0]:
# langchain.debug = True #uncomment to see the chain details and the full prompt being sent
question = {"query": "What documentation or proof is required when enrolling my domestic partner, and how does the tax treatment differ from covering a spouse?"}
answer = chain.run(question)
print(answer)


  warn_deprecated(


You may be required to provide evidence of your partnership in connection with a plan audit of dependent eligibility or a claim for benefits. You may also sign the Woodgrove Financial Affidavit of Domestic Partnership before a notary and retain the affidavit in your records.

The tax treatment differs from covering a spouse because domestic partners generally do not qualify as spouses or dependents for federal income tax purposes. Therefore, the value of company-provided medical, dental, and vision coverage for your domestic partner will be considered imputed income and will be taxable to you.


### Log the model to model registry

In [0]:
from mlflow.models import infer_signature
import mlflow
import langchain

mlflow.set_registry_uri("databricks-uc")
model_name = f"{catalog}.{db}.rag_chatbot_model_v01"

with mlflow.start_run(run_name="chatbot_rag") as run:
    signature = infer_signature(question, answer)
    model_info = mlflow.langchain.log_model(
        chain,
        loader_fn=get_retriever,  # Load the retriever with DATABRICKS_TOKEN env as secret (for authentication).
        artifact_path="chain",
        registered_model_name=model_name,
        pip_requirements=[
            "mlflow==" + mlflow.__version__,
            "langchain==" + langchain.__version__,
            "databricks-vectorsearch",
        ],
        input_example=question,
        signature=signature
    )

 - mlflow (current: 2.10.1, required: mlflow==2.14.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


Uploading artifacts:   0%|          | 0/11 [00:00<?, ?it/s]

Successfully registered model 'cronos_unity_catalog.cdata.rag_chatbot_model_v02'.


Uploading artifacts:   0%|          | 0/11 [00:00<?, ?it/s]

Created version '1' of model 'cronos_unity_catalog.cdata.rag_chatbot_model_v02'.


In [0]:
model = mlflow.langchain.load_model(model_info.model_uri)
model.invoke(question)

Downloading artifacts:   0%|          | 0/11 [00:00<?, ?it/s]



[NOTICE] Using a Personal Authentication Token (PAT). Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True to VectorSearchClient().


  warn_deprecated(


{'query': 'What documentation or proof is required when enrolling my domestic partner, and how does the tax treatment differ from covering a spouse?',
 'result': 'You may be required to provide evidence of your partnership in connection with a plan audit of dependent eligibility or a claim for benefits. You may also sign the Woodgrove Financial Affidavit of Domestic Partnership before a notary and retain the affidavit in your records.\n\nThe tax treatment differs from covering a spouse in that domestic partners generally do not qualify as spouses or dependents for federal income tax purposes. Therefore, the value of company-provided medical, dental, and vision coverage for your domestic partner will be considered imputed income and will be taxable to you.'}

### Deploying our Chat Model as a Serverless Model Endpoint


In [0]:
import mlflow
from mlflow.tracking import MlflowClient
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedModelInput, ServedModelInputWorkloadSize

In [0]:
mlflow.set_registry_uri('databricks-uc')
model_name = f"{catalog}.{db}.rag_chatbot_model_v01"
serving_endpoint_name = "rag-chatbot-model-endpoint-v01"
latest_model_version = 1

In [0]:
# Create or update serving endpoint
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedModelInput, ServedModelInputWorkloadSize

endpoint_config = EndpointCoreConfigInput(
    name=serving_endpoint_name,
    served_models=[
        ServedModelInput(
            model_name=model_name,
            model_version=latest_model_version,
            workload_size=ServedModelInputWorkloadSize.LARGE,
            scale_to_zero_enabled=True,
            environment_vars={
                "DATABRICKS_TOKEN": "{{secrets/databricksscope/databricks-token}}" , ## Looks like this token is expired
            }
        )
    ]
)

In [0]:
from databricks.sdk import WorkspaceClient
w = WorkspaceClient()

existing_endpoint = next(
    (e for e in w.serving_endpoints.list() if e.name == serving_endpoint_name), None
)
serving_endpoint_url = f"{host}/ml/endpoints/{serving_endpoint_name}"
if existing_endpoint == None:
    print(f"Creating the endpoint {serving_endpoint_url}, this will take a few minutes to package and deploy the endpoint...")
    w.serving_endpoints.create_and_wait(name=serving_endpoint_name, config=endpoint_config)
else:
    print(f"Updating the endpoint {serving_endpoint_url} to version {latest_model_version}, this will take a few minutes to package and deploy the endpoint...")
    w.serving_endpoints.update_config_and_wait(served_models=endpoint_config.served_models, name=serving_endpoint_name)

Creating the endpoint https://adb-835232524225733.13.azuredatabricks.net/ml/endpoints/rag-chatbot-model-endpoint-v3, this will take a few minutes to package and deploy the endpoint...
