In [0]:
import time

import requests
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.catalog import (
    OnlineTable,
    OnlineTableSpec,
    OnlineTableSpecTriggeredSchedulingPolicy,
)
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedEntityInput
# from pyspark.sql import SparkSession
from databricks.connect import DatabricksSession

from utils import load_config

In [0]:
spark = DatabricksSession.builder.getOrCreate()
workspace = WorkspaceClient()

In [0]:
config = load_config("../project_config.yml")
print(config)

[32m2025-03-28 21:04:24.091[0m | [1mINFO    [0m | [36mutils[0m:[36mload_config[0m:[36m66[0m - [1mLoaded configuration from ../project_config.yml[0m


catalog_name='test01' schema_name='default' pipeline_id='4be66e88-11e8-4988-9fa3-459b9b28a83c' parameters={'learning_rate': 0.05, 'random_state': 42, 'force_col_wise': True} ab_test={'learning_rate_a': 0.05, 'learning_rate_b': 0.1, 'force_col_wise': True} num_features=[NumFeature(name='ID', dtype='int64'), NumFeature(name='LIMIT_BAL', dtype='float64'), NumFeature(name='SEX', dtype='int64'), NumFeature(name='EDUCATION', dtype='int64'), NumFeature(name='MARRIAGE', dtype='int64'), NumFeature(name='AGE', dtype='int64'), NumFeature(name='PAY_0', dtype='int64'), NumFeature(name='PAY_2', dtype='int64'), NumFeature(name='PAY_3', dtype='int64'), NumFeature(name='PAY_4', dtype='int64'), NumFeature(name='PAY_5', dtype='int64'), NumFeature(name='PAY_6', dtype='int64'), NumFeature(name='BILL_AMT1', dtype='float64'), NumFeature(name='BILL_AMT2', dtype='float64'), NumFeature(name='BILL_AMT3', dtype='float64'), NumFeature(name='BILL_AMT4', dtype='float64'), NumFeature(name='BILL_AMT5', dtype='float64'

In [0]:
catalog_name = config.catalog_name
schema_name = config.schema_name

In [0]:
spec = OnlineTableSpec(
    primary_key_columns=["Id"],
    source_table_full_name=f"{catalog_name}.{schema_name}.features_balanced",
    run_triggered=OnlineTableSpecTriggeredSchedulingPolicy.from_dict({"triggered": "true"}),
    perform_full_copy=False,
)

# Create the online table in Databricks
online_table_name = f"{catalog_name}.{schema_name}.features_balanced_online"

table = OnlineTable(name=online_table_name, spec=spec)
# print(online_table_name.as_dict())

In [0]:
workspace.online_tables.create(
    name=online_table_name,
    spec=spec
)

OnlineTable(name='test01.default.features_balanced_online', spec=OnlineTableSpec(perform_full_copy=False, pipeline_id='a7026c5c-3f4d-44b2-8fab-35f472d9505c', primary_key_columns=['Id'], run_continuously=None, run_triggered=OnlineTableSpecTriggeredSchedulingPolicy(), source_table_full_name='test01.default.features_balanced', timeseries_key=None), status=OnlineTableStatus(continuous_update_status=None, detailed_state=<OnlineTableState.PROVISIONING: 'PROVISIONING'>, failed_status=None, message='Online Table creation started.', provisioning_status=None, triggered_update_status=None), table_serving_url=None)

In [0]:
print(workspace.online_tables.get(online_table_name))

OnlineTable(name='test01.default.features_balanced_online', spec=OnlineTableSpec(perform_full_copy=False, pipeline_id='a7026c5c-3f4d-44b2-8fab-35f472d9505c', primary_key_columns=['Id'], run_continuously=None, run_triggered=OnlineTableSpecTriggeredSchedulingPolicy(), source_table_full_name='test01.default.features_balanced', timeseries_key=None), status=OnlineTableStatus(continuous_update_status=None, detailed_state=<OnlineTableState.PROVISIONING_PIPELINE_RESOURCES: 'PROVISIONING_PIPELINE_RESOURCES'>, failed_status=None, message='Online Table is currently pending setup of pipeline resources. Check latest status in Delta Live Tables: https://adb-4478913909061743.3.azuredatabricks.net#joblist/pipelines/a7026c5c-3f4d-44b2-8fab-35f472d9505c/updates/ac9455b4-24f4-41b8-af94-5eb6c8ea101b', provisioning_status=ProvisioningStatus(initial_pipeline_sync_progress=PipelineProgress(estimated_completion_time_seconds=None, latest_version_currently_processing=None, sync_progress_completion=None, synced_

In [0]:
# Pipeline_id to be added into the project_config.yml
print(workspace.online_tables.get(online_table_name).spec.pipeline_id)

a7026c5c-3f4d-44b2-8fab-35f472d9505c


In [0]:
workspace.serving_endpoints.create(
    name="credit-default-model-serving-feature",
    config=EndpointCoreConfigInput(
        served_entities=[
            ServedEntityInput(
                entity_name=f"{catalog_name}.{schema_name}.credit_model_feature",
                scale_to_zero_enabled=True,
                workload_size="Small",
                entity_version=1,
            )
        ]
    ),
)

<databricks.sdk.service._internal.Wait at 0x7f080cb23b50>

In [0]:
token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()  # noqa: F821

# token = dbutils.secrets.get(scope="secret-scope", key="DATABRICKS_TOKEN")  # noqa: F821

host = spark.conf.get("spark.databricks.workspaceUrl")

In [0]:
host

'adb-4478913909061743.3.azuredatabricks.net'

In [0]:
required_columns = ["Id"]

In [0]:
required_columns

['Id']

In [0]:
train_set = spark.table(f"{catalog_name}.{schema_name}.train_set").toPandas()

sampled_records = train_set[required_columns].sample(n=1000, replace=True).to_dict(orient="records")
dataframe_records = [[record] for record in sampled_records]

In [0]:
print(train_set.dtypes)

Id                              object
Limit_bal                      float64
Sex                              int32
Education                        int32
Marriage                         int32
Age                              int32
Pay_0                            int32
Pay_2                            int32
Pay_3                            int32
Pay_4                            int32
Pay_5                            int32
Pay_6                            int32
Bill_amt1                      float64
Bill_amt2                      float64
Bill_amt3                      float64
Bill_amt4                      float64
Bill_amt5                      float64
Bill_amt6                      float64
Pay_amt1                       float64
Pay_amt2                       float64
Pay_amt3                       float64
Pay_amt4                       float64
Pay_amt5                       float64
Pay_amt6                       float64
Default                          int32
Update_timestamp_utc    d

In [0]:
dataframe_records[0]

[{'Id': '23253'}]

In [0]:
import time
from databricks.sdk import WorkspaceClient

w = WorkspaceClient()

def wait_for_endpoint(endpoint_name, max_wait_sec=300, check_interval_sec=10):
    start_time = time.time()
    while time.time() - start_time < max_wait_sec:
        status = w.serving_endpoints.get(name=endpoint_name).state.ready
        if status:
            print(f"Endpoint {endpoint_name} is ready!")
            return True
        print(f"Endpoint {endpoint_name} not ready yet. Waiting {check_interval_sec} seconds...")
        time.sleep(check_interval_sec)
    print(f"Endpoint {endpoint_name} did not become ready within {max_wait_sec} seconds")
    return False

wait_for_endpoint("credit-default-model-serving-feature")

Endpoint credit-default-model-serving-feature is ready!


True

In [0]:
host

'adb-4478913909061743.3.azuredatabricks.net'

In [0]:
start_time = time.time()

# model_serving_endpoint = f"https://{host}/serving-endpoints/credit-default-model-serving-feature/invocations"
model_serving_endpoint = f"https://{host}/serving-endpoints/credit-default-model-serving-feature/invocations"

response = requests.post(
    f"{model_serving_endpoint}",
    headers={"Authorization": f"Bearer {token}"},
    json={"dataframe_records": dataframe_records[0]},
)

end_time = time.time()
execution_time = end_time - start_time

print("Response status:", response.status_code)
print("Reponse text:", response.text)
print("Execution time:", execution_time, "seconds")

Response status: 200
Reponse text: {"predictions": [1]}
Execution time: 0.7516846656799316 seconds


In [0]:
credit_features = spark.table(f"{catalog_name}.{schema_name}.features_balanced").toPandas()

print(credit_features.dtypes)

Id            object
Limit_bal    float64
Sex          float64
Education    float64
Marriage     float64
Age          float64
Pay_0        float64
Pay_2        float64
Pay_3        float64
Pay_4        float64
Pay_5        float64
Pay_6        float64
Bill_amt1    float64
Bill_amt2    float64
Bill_amt3    float64
Bill_amt4    float64
Bill_amt5    float64
Bill_amt6    float64
Pay_amt1     float64
Pay_amt2     float64
Pay_amt3     float64
Pay_amt4     float64
Pay_amt5     float64
Pay_amt6     float64
dtype: object


In [0]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import random

## Load Test

# Initialize variables
# model_serving_endpoint = f"https://{host}/serving-endpoints/credit-default-model-serving/invocations"
model_serving_endpoint = f"https://{host}/serving-endpoints/credit-default-model-serving-feature/invocations"

headers = {"Authorization": f"Bearer {token}"}
num_requests = 1000


# Function to make a request and record latency
def send_request():
    random_record = random.choice(dataframe_records)
    start_time = time.time()
    response = requests.post(
        model_serving_endpoint,
        headers=headers,
        json={"dataframe_records": random_record},
    )
    end_time = time.time()
    latency = end_time - start_time
    return response.status_code, latency


total_start_time = time.time()
latencies = []

# Send requests concurrently
with ThreadPoolExecutor(max_workers=100) as executor:
    futures = [executor.submit(send_request) for _ in range(num_requests)]

    for future in as_completed(futures):
        status_code, latency = future.result()
        latencies.append(latency)

total_end_time = time.time()
total_execution_time = total_end_time - total_start_time

# Calculate the average latency
average_latency = sum(latencies) / len(latencies)

print("\nTotal execution time:", total_execution_time, "seconds")
print("Average latency per request:", average_latency, "seconds")


Total execution time: 38.22215461730957 seconds
Average latency per request: 3.498054855585098 seconds
