In [4]:
import time
from datetime import datetime, timezone
import uuid
from databricks.sdk import WorkspaceClient
from databricks.sdk.service import ml

# --- 1) Create an experiment ---
exp = w.experiments.create_experiment(name="/Users/feldmanngreg@gmail.com/my-experiment55")
exp_id = exp.experiment_id
print(f"Experiment: {exp_id}")

# --- 2) Create a run with optional tags ---
run = w.experiments.create_run(
    experiment_id=exp_id,
    run_name="hello-sdk-MOOO_MOO_MOO"
)
run_id = run.run.info.run_id
print(f"Created run: {run_id}")

# --- 3) Log params and metrics in a single batch ---
now_ms = int(time.time() * 1000)
w.experiments.log_batch(
    run_id=run_id,
    params=[
        ml.Param(key="framework", value="sklearn"),
        ml.Param(key="n_estimators", value="200")
    ],
    metrics=[
        ml.Metric(key="accuracy", value=0.95, timestamp=now_ms, step=0)
    ]
)
print("Logged params/metrics.")

# --- 4) Create a logged model tied to that run ---
resp = w.experiments.create_logged_model(
    experiment_id=exp_id,
    source_run_id=run_id,
    model_type="Classifier",
    name="hello-sdk-model",
    params=[
        ml.LoggedModelParameter(key="framework", value="sklearn"),
        ml.LoggedModelParameter(key="n_estimators", value="200"),
    ],
    tags=[
        ml.LoggedModelTag(key="owner", value="feldmanngreg@gmail.com"),
        ml.LoggedModelTag(key="env", value="dev"),
    ]
)
print("CreateLoggedModelResponse:", resp)

# --- 5) Read the run back (includes latest metrics/params/tags) ---
got = w.experiments.get_run(run_id=run_id)
print("Params:", {p.key: p.value for p in got.run.data.params})
print("Metrics:", {m.key: m.value for m in got.run.data.metrics})
print("Tags:", {t.key: t.value for t in got.run.data.tags})

# --- 6) Cleanup (optional) ---
# w.experiments.delete_run(run_id=run_id)
# w.experiments.delete_experiment(experiment_id=exp_id)


ResourceAlreadyExists: Node named 'my-experiment55' already exists

In [5]:
# List catalogs
for cat in w.catalogs.list():
    print("CAT:", cat.name)

CAT: main
CAT: samples
CAT: system
CAT: workspace


In [6]:
# List schemas in a catalog
for sch in w.schemas.list(catalog_name="workspace"):
    print("SCH:", sch.full_name)

SCH: workspace.default
SCH: workspace.information_schema


In [7]:
# List tables in a schema
for t in w.tables.list(catalog_name="workspace", schema_name="default"):
    print("TBL:", t.full_name)  # e.g. samples.nyctaxi.trips


TBL: workspace.default.aapl_market_price


In [8]:
# Inspect a specific table
t = w.tables.get("workspace.default.aapl_market_price")
print(t.table_type, t.data_source_format)

TableType.MANAGED DataSourceFormat.DELTA


In [9]:
# You already have this:
t = w.tables.get("workspace.default.aapl_market_price")
print(t.full_name, t.table_type, t.data_source_format)  # sanity check

# Pick a SQL Warehouse (or set warehouse_id = "...your warehouse id...")
warehouse = next(iter(w.warehouses.list()))
warehouse_id = warehouse.id
print(f"Using warehouse: {warehouse.name} ({warehouse_id})")

# Build a query (fully qualified name from t.full_name)
sql = f"""
SELECT CAST(date AS DATE) AS date, adj_close
FROM {t.full_name}
ORDER BY date
"""

# Execute via Statement Execution API
stmt = w.statement_execution.execute_statement(
    statement=sql,
    warehouse_id=warehouse_id,
    # Optional context (not required since we used full_name):
    catalog=t.catalog_name,
    schema=t.schema_name
)

workspace.default.aapl_market_price TableType.MANAGED DataSourceFormat.DELTA
Using warehouse: Serverless Starter Warehouse (fe0c99db2c996651)


In [5]:
import os

os.environ["DATABRICKS_CONFIG_PROFILE"] = "C:/Users/feldm"

from databricks.sdk import WorkspaceClient
from databricks.sdk.service.sql import ExecuteStatementRequestOnWaitTimeout
from databricks.sdk.service import ml

import time
from datetime import datetime, timezone
import uuid

import pandas as pd

w = WorkspaceClient(profile="DEFAULT", config_file="C:/Users/feldm/.databrickscfg")

# --- Pick a warehouse (or set warehouse_id = "...") ---
warehouse = next(iter(w.warehouses.list()))
warehouse_id = warehouse.id
print(f"Using warehouse: {warehouse.name} ({warehouse_id}); state={warehouse.state}")

# --- Get the table and build SQL ---
t = w.tables.get("workspace.default.aapl_market_price")

warehouses = w.warehouses.list()
for warehouse in warehouses:
    print(f"Name: {warehouse.name}, ID: {warehouse.id}")

response = w.statement_execution.execute_statement(
    statement=f"SELECT * FROM {t.catalog_name}.{t.schema_name}.aapl_market_price",
    warehouse_id=warehouse.id,
    wait_timeout="30s",  # Wait up to 30 seconds
    on_wait_timeout=ExecuteStatementRequestOnWaitTimeout.CANCEL  # Cancel if timeout
)
if response.status.state == "SUCCEEDED":
    print("Results:", response.result.data)  # Access the data

column_names = [col.name for col in response.manifest.schema.columns]
df = pd.DataFrame(response.result.data_array, columns=column_names)
df.head()

Using warehouse: Serverless Starter Warehouse (fe0c99db2c996651); state=State.RUNNING
Name: Serverless Starter Warehouse, ID: fe0c99db2c996651
