In [57]:
#  submit jobs to API Databricks
# https://docs.databricks.com/api/workspace/jobs/create

In [34]:
import requests
import json
from dotenv import dotenv_values

In [35]:
config = dotenv_values(".env")

In [76]:
config.get("DATABRICKS_HOST")

'https://adb-1286930193882465.5.azuredatabricks.net'

In [95]:
workspace_url = config.get("DATABRICKS_HOST")
url = workspace_url + "/api/2.1/jobs/runs/submit"
#url = workspace_url + "/api/2.1/jobs/create"

In [92]:
token = config.get("DATABRICKS_TOKEN")

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}
#"existing_cluster_id" : "0531-123607-lblvj6fa",

# cluster on Demand

In [96]:
body = {
  "name" : "MyJob_run",
  "tasks" : [
    {
      "task_key" : "run_notebook_from_api",
        "new_cluster":{
                "spark_version": "15.1.x-cpu-ml-scala2.12",
    "spark_conf": {
        "spark.master": "local[*, 4]",
        "spark.databricks.cluster.profile": "singleNode"
    },
    "azure_attributes": {
        "first_on_demand": 1,
        "availability": "ON_DEMAND_AZURE",
        "spot_bid_max_price": -1
    },
    "node_type_id": "Standard_DS3_v2",
    "driver_node_type_id": "Standard_DS3_v2",
    "custom_tags": {
        "ResourceClass": "SingleNode"
    },

    "enable_elastic_disk": True,
    "single_user_name": "olonok@hotmail.com",
    "data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
    "runtime_engine": "STANDARD",
    "num_workers": 0,
               
            },
      "notebook_task" : {
        "notebook_path" : "/Workspace/Users/olonok@hotmail.com/jobs-api",
        "source" : "WORKSPACE",
        "base_parameters": {
            "experiment_name": "xgboost_experiments_test_en",
  
            }
        }
    }
  ],
  "timeout_seconds" : 3600,
  "tags" : {
    "project" : "data_analytics",
    "process" : "extraction"
   } 
}

In [97]:
response = requests.post(url, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{"run_id":766733162915718}


# To an existing Cluster

In [98]:
body = {
  "name" : "MyJob_existing_cluster",
  "tasks" : [
    {
      "task_key" : "run_notebook_from_api",
        "existing_cluster_id" : "0603-211930-u6lf0uot",

      "notebook_task" : {
        "notebook_path" : "/Workspace/Users/olonok@hotmail.com/jobs-api",
        "source" : "WORKSPACE",
          "base_parameters": {
            "experiment_name": "xgboost_experiments_existing_cluster",
  
            }
        }
    }
  ],
  "timeout_seconds" : 3600,
  "tags" : {
    "project" : "data_analytics",
    "process" : "extraction"
   } 
}

In [99]:
response = requests.post(url, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{"run_id":850595848950128}


# To a pool

In [None]:
body = {
  "name" : "MyJob",
  "tasks" : [
    {
      "task_key" : "run_notebook_from_api",

        "new_cluster":{
                "num_workers": 1,
                "autoscale":{
                    "min_workers":1,
                    "max_workers":1
                },

                "spark_version": "15.1.x-cpu-ml-scala2.12",
                "spark_conf":{

                },
                "instance_pool_id": "0603-205900-times49-pool-v6dusypa"
            },
      "notebook_task" : {
        "notebook_path" : "/Workspace/Users/olonok@hotmail.com/jobs-api",
        "source" : "WORKSPACE",
          "base_parameters": {
            "experiment_name": "xgboost_experiments",
  
            }
        }
    }
  ],
  "timeout_seconds" : 3600,
  "tags" : {
    "project" : "data_analytics",
    "process" : "extraction"
   } 
}

# list jobs

In [100]:

urllist = workspace_url + "/api/2.1/jobs/list"
response = requests.get(urllist, headers = headers)

print(response.status_code)
print(response.text)

200
{"jobs":[{"job_id":743233108966656,"creator_user_name":"olonok@hotmail.com","settings":{"name":"MyJob","email_notifications":{},"timeout_seconds":3600,"max_concurrent_runs":1,"tags":{"process":"extraction","project":"data_analytics"},"format":"MULTI_TASK"},"created_time":1717492079616},{"job_id":433201300352403,"creator_user_name":"olonok@hotmail.com","settings":{"name":"New Job 2024-06-04 09:21:45","email_notifications":{"no_alert_for_skipped_runs":false},"timeout_seconds":0,"max_concurrent_runs":1,"format":"MULTI_TASK","queue":{"enabled":true}},"created_time":1717489297837}],"has_more":false}


In [65]:
# List runs

In [101]:

urllistrun = workspace_url + "/api/2.1/jobs/runs/list"
response = requests.get(urllistrun, headers = headers)

print(response.status_code)
print(response.text)

200
{"runs":[{"job_id":92750622646995,"run_id":850595848950128,"creator_user_name":"olonok@hotmail.com","number_in_job":850595848950128,"state":{"life_cycle_state":"RUNNING","state_message":"","user_cancelled_or_timedout":false},"start_time":1717492447012,"setup_duration":0,"execution_duration":0,"cleanup_duration":0,"end_time":0,"run_duration":313175,"run_name":"Untitled","run_page_url":"https://adb-1286930193882465.5.azuredatabricks.net/?o=1286930193882465#job/92750622646995/run/850595848950128","run_type":"SUBMIT_RUN","format":"MULTI_TASK"},{"job_id":867241885321951,"run_id":766733162915718,"creator_user_name":"olonok@hotmail.com","number_in_job":766733162915718,"state":{"life_cycle_state":"RUNNING","state_message":"","user_cancelled_or_timedout":false},"start_time":1717492308050,"setup_duration":0,"execution_duration":0,"cleanup_duration":0,"end_time":0,"run_duration":452138,"run_name":"Untitled","run_page_url":"https://adb-1286930193882465.5.azuredatabricks.net/?o=1286930193882465

# Cancel a Job Run

In [102]:
body = {
  "run_id": 850595848950128
}
urlcancelrun = workspace_url + "/api/2.1/jobs/runs/cancel"
response = requests.post(urlcancelrun, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{}


# Workspace 

In [103]:
urllist = workspace_url + "/api/2.0/workspace/list"
params={
"path": "/Users/olonok@hotmail.com/"
}
response = requests.get(urllist, headers = headers, params=params )

print(response.status_code)
print(response.text)

200
{"objects":[{"object_type":"NOTEBOOK","path":"/Users/olonok@hotmail.com/jobs-api","language":"PYTHON","created_at":1717159068335,"modified_at":1717482081431,"object_id":760923413360882,"resource_id":"760923413360882"},{"object_type":"MLFLOW_EXPERIMENT","path":"/Users/olonok@hotmail.com/pipelines","object_id":878341338033203,"resource_id":"878341338033203"},{"object_type":"NOTEBOOK","path":"/Users/olonok@hotmail.com/mlflow-end-to-end","language":"PYTHON","created_at":1717447044835,"modified_at":1717447044888,"object_id":1899686197831447,"resource_id":"1899686197831447"},{"object_type":"MLFLOW_EXPERIMENT","path":"/Users/olonok@hotmail.com/multistep_workflow","object_id":1910808740223562,"resource_id":"1910808740223562"},{"object_type":"NOTEBOOK","path":"/Users/olonok@hotmail.com/MLflow with Optuna: Hyperparameter Optimization and Tracking","language":"PYTHON","created_at":1717344828119,"modified_at":1717395402270,"object_id":2546891085146716,"resource_id":"2546891085146716"},{"object

# Machine Learning

In [104]:
urlrun_id = workspace_url + "/api/2.0/mlflow/runs/get"
params={
"run_id": "0e3c41c688d841f39c83ccd0095d5954"
}
response = requests.get(urlrun_id, headers = headers, params=params )

print(response.status_code)
print(response.text)

200
{"run":{"info":{"run_id":"0e3c41c688d841f39c83ccd0095d5954","run_uuid":"0e3c41c688d841f39c83ccd0095d5954","experiment_id":"2546891085146744","run_name":"charming-hog-782","status":"FINISHED","start_time":1717353627765,"end_time":1717353628754,"artifact_uri":"dbfs:/databricks/mlflow-tracking/2546891085146744/0e3c41c688d841f39c83ccd0095d5954/artifacts","lifecycle_stage":"active"},"data":{"metrics":[{"key":"mse","value":34331.82597229768,"timestamp":1717353628366,"step":0},{"key":"rmse","value":185.28849390153098,"timestamp":1717353628551,"step":0}],"params":[{"key":"alpha","value":"5.0206024979717066e-08"},{"key":"booster","value":"gbtree"},{"key":"eta","value":"0.6943485418169614"},{"key":"eval_metric","value":"rmse"},{"key":"gamma","value":"0.00019463076719002743"},{"key":"grow_policy","value":"depthwise"},{"key":"lambda","value":"1.238357756908523e-06"},{"key":"max_depth","value":"5"},{"key":"objective","value":"reg:squarederror"}],"tags":[{"key":"mlflow.databricks.cluster.id","va

# Create cluster

In [105]:
# ccreate cluster
body = {
  "cluster_name": "single-node-cluster",
  "spark_version": "14.3.x-scala2.12",
  "node_type_id":"Standard_DS3_v2",
  "num_workers": 0,
  "spark_conf": {
    "spark.databricks.cluster.profile": "singleNode",
    "spark.master": "[*, 4]"
  },
  "custom_tags": {
    "ResourceClass": "SingleNode"
  },

}


urlcreate_cluster = workspace_url + "/api/2.0/clusters/create"
response = requests.post(urlcreate_cluster, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{"cluster_id":"0604-092256-b5lijbdg"}
