In [0]:
%pip install openai mlflow 
dbutils.library.restartPython()

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


# Three ways to call Databricks Mosaic AI model serving endpoints
* 1. Using OpenAI sdk to call model serving endpoint
* 2. Using Databricks sdk to call model serving endpoint
* 3. Using rest api to call model serving endpoint

In [0]:
catalog = "prasad_kona_isv"
schema = "demo"
model_name = "langgraph-tool-calling-agent"
UC_MODEL_NAME = f"{catalog}.{schema}.{model_name}"

In [0]:
from openai import OpenAI
import os


# In a Databricks notebook you can use this:
DATABRICKS_HOSTNAME = dbutils.notebook.entry_point.getDbutils().notebook().getContext().browserHostName().get()

# How to get your Databricks token: https://docs.databricks.com/en/dev-tools/auth/pat.html
#DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN')
# Alternatively in a Databricks notebook you can use this:
DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

serving_endpoint_name ="agents_prasad_kona_isv-demo-langgraph-tool-calling-agent"

client = OpenAI(
  api_key=DATABRICKS_TOKEN,
  base_url=f"https://{DATABRICKS_HOSTNAME}/serving-endpoints"
)

chat_completion = client.chat.completions.create(
  messages=[
  {
    "role": "system",
    "content": "You are an AI assistant"
  },
  {
    "role": "user",
    "content": "Tell me about Large Language Models in one sentence"
  }
  ],
  model= serving_endpoint_name,
  max_tokens=256
)

print(chat_completion.choices[0].message.content) if chat_completion and chat_completion.choices else print(chat_completion)



ChatCompletion(id='e126e4c9-b63a-4ad9-ac5b-6adea89bfe69', choices=None, created=None, model=None, object=None, service_tier=None, system_fingerprint=None, usage=None, messages=[{'role': 'assistant', 'content': 'Large Language Models are a type of artificial intelligence designed to process and understand human language, generating coherent and contextually relevant text based on the input they receive, with applications in areas such as chatbots, language translation, and text summarization.', 'id': 'run--f1b007b7-3cdc-45c5-a796-aa099f376dcb-0'}], databricks_output={'databricks_request_id': 'e126e4c9-b63a-4ad9-ac5b-6adea89bfe69'})


In [0]:
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import DataframeSplitInput

# In a Databricks notebook you can use this:
#databricks_hostname = dbutils.notebook.entry_point.getDbutils().notebook().getContext().browserHostName()
#databricks_token=dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

serving_endpoint_name ="agents_prasad_kona_isv-demo-langgraph-tool-calling-agent"

#endpoint_url = f"https://{databricks_hostname}/serving-endpoints/{serving_endpoint_name}/invocations"


w = WorkspaceClient()

test_dialog = DataframeSplitInput(
    columns=["messages"],
    data=[
        {
            "messages": [
                {"role": "user", "content": "How does billing work on Databricks? Answer in one sentence"},
            ]
        }
    ],
)
answer = w.serving_endpoints.query(serving_endpoint_name, dataframe_split=test_dialog)
print(answer.predictions)

{'messages': [{'role': 'assistant', 'content': 'Databricks billing is based on Databricks Units (DBUs), which are a measure of the compute resources used by an organization, with costs varying depending on the cloud provider, instance type, and usage.', 'id': 'run--c5ab5c60-dbb2-41b8-8bd0-5518ee58cac9-0'}], 'id': '18e2ed1e-f36d-42ea-b83a-d2dfedb6f7d4'}


In [0]:
import json
import requests

# In a Databricks notebook you can use this:
DATABRICKS_TOKEN=dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
DATABRICKS_HOSTNAME = dbutils.notebook.entry_point.getDbutils().notebook().getContext().browserHostName().get()
serving_endpoint_name ="agents_prasad_kona_isv-demo-langgraph-tool-calling-agent"

endpoint_url = f"https://{DATABRICKS_HOSTNAME}/serving-endpoints/{serving_endpoint_name}/invocations"

input_example = {
    "messages": [
        {
            "role": "user",
            "content": "How does billing work on Databricks? Answer in one sentence",
        }
    ]
}

def call_model_serving_endpoint(payload):
    url = endpoint_url
    headers = {'Authorization': f'Bearer {DATABRICKS_TOKEN}', 'Content-Type': 'application/json'}
    
    data_json = json.dumps(payload, allow_nan=True)
    response = requests.request(method='POST', headers=headers, url=url, data=data_json)
    return response.json()

response_data = call_model_serving_endpoint(input_example)
display(response_data)

{'messages': [{'role': 'assistant',
   'content': 'Databricks billing is based on Databricks Units (DBUs), which are consumed by running clusters, jobs, and other workloads, with costs calculated according to the type and size of the cluster, as well as the duration of usage.',
   'id': 'run--3e87b7c5-6303-43db-a2e6-2ae410134644-0'}],
 'id': 'f5335637-fc86-4716-8687-1830f170d685',
 'databricks_output': {'databricks_request_id': 'f5335637-fc86-4716-8687-1830f170d685'}}