In [57]:
#  submit jobs to API Databricks
# https://docs.databricks.com/api/workspace/jobs/create

In [133]:
import requests
import json
from dotenv import dotenv_values
import pprint

In [35]:
config = dotenv_values(".env")

In [106]:
config.get("DATABRICKS_HOST")

'https://adb-1286930193882465.5.azuredatabricks.net'

In [107]:
workspace_url = config.get("DATABRICKS_HOST")
token = config.get("DATABRICKS_TOKEN")

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}


# Create cluster

In [110]:
urlcreate_cluster = workspace_url + "/api/2.0/clusters/create"

body = {
  "cluster_name": "single-node-cluster",
  "spark_version": "14.3.x-scala2.12",
  "node_type_id":"Standard_DS3_v2",
  "num_workers": 0,
  "spark_conf": {
    "spark.databricks.cluster.profile": "singleNode",
    "spark.master": "[*, 4]"
  },
  "custom_tags": {
    "ResourceClass": "SingleNode"
  },

}

response = requests.post(urlcreate_cluster, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{"cluster_id":"0604-101731-n1bv91jk"}


# Submit Job run with creation of new cluster

In [111]:
config.get("DATABRICKS_HOST")

'https://adb-1286930193882465.5.azuredatabricks.net'

In [157]:
workspace_url = config.get("DATABRICKS_HOST")
token = config.get("DATABRICKS_TOKEN")

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"}

url = workspace_url + "/api/2.1/jobs/runs/submit"


In [158]:
body = {
  "name" : "MyJob_run",
  "tasks" : [
    {
      "task_key" : "run_notebook_from_api",
        "new_cluster":{
                "spark_version": "15.1.x-cpu-ml-scala2.12",
    "spark_conf": {
        "spark.master": "local[*, 4]",
        "spark.databricks.cluster.profile": "singleNode"
    },
    "azure_attributes": {
        "first_on_demand": 1,
        "availability": "ON_DEMAND_AZURE",
        "spot_bid_max_price": -1
    },
    "node_type_id": "Standard_DS3_v2",
    "driver_node_type_id": "Standard_DS3_v2",
    "custom_tags": {
        "ResourceClass": "SingleNode"
    },

    "enable_elastic_disk": True,
    "single_user_name": "olonok@hotmail.com",
    "data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
    "runtime_engine": "STANDARD",
    "num_workers": 0,
               
            },
      "notebook_task" : {
        "notebook_path" : "/Workspace/Users/olonok@hotmail.com/jobs-api",
        "source" : "WORKSPACE",
        "base_parameters": {
            "experiment_name": "xgboost_experiments_test_en",
  
            }
        }
    }
  ],
  "timeout_seconds" : 3600,
  "tags" : {
    "project" : "data_analytics",
    "process" : "extraction"
   } 
}

In [159]:
response = requests.post(url, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{"run_id":640968121528131}


# Create a Job and link it to an an existing Cluster

In [119]:
config.get("DATABRICKS_HOST")

'https://adb-1286930193882465.5.azuredatabricks.net'

In [150]:
workspace_url = config.get("DATABRICKS_HOST")
token = config.get("DATABRICKS_TOKEN")

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}
# url = workspace_url + "/api/2.1/jobs/runs/submit"
url = workspace_url + "/api/2.1/jobs/create"

In [151]:
body = {
  "name" : "New_Job_existing_cluster_2",
  "tasks" : [
    {
      "task_key" : "run_notebook_from_api",
        "existing_cluster_id" : "0603-211930-u6lf0uot",

      "notebook_task" : {
        "notebook_path" : "/Workspace/Users/olonok@hotmail.com/jobs-api",
        "source" : "WORKSPACE",
          "base_parameters": {
            "experiment_name": "xgboost_experiments_existing_cluster",
  
            }
        }
    }
  ],
  "timeout_seconds" : 3600,
  "tags" : {
    "project" : "data_analytics",
    "process" : "extraction"
   } 
}

In [152]:
response = requests.post(url, headers = headers, data = json.dumps(body))

print(response.status_code)
pprint.pprint(response.text)

200
'{"job_id":562279998675533}'


# To a pool

In [None]:
body = {
  "name" : "MyJob",
  "tasks" : [
    {
      "task_key" : "run_notebook_from_api",

        "new_cluster":{
                "num_workers": 1,
                "autoscale":{
                    "min_workers":1,
                    "max_workers":1
                },

                "spark_version": "15.1.x-cpu-ml-scala2.12",
                "spark_conf":{

                },
                "instance_pool_id": "0603-205900-times49-pool-v6dusypa"
            },
      "notebook_task" : {
        "notebook_path" : "/Workspace/Users/olonok@hotmail.com/jobs-api",
        "source" : "WORKSPACE",
          "base_parameters": {
            "experiment_name": "xgboost_experiments",
  
            }
        }
    }
  ],
  "timeout_seconds" : 3600,
  "tags" : {
    "project" : "data_analytics",
    "process" : "extraction"
   } 
}

# List jobs in your workspace

In [138]:
config.get("DATABRICKS_HOST")

'https://adb-1286930193882465.5.azuredatabricks.net'

In [139]:
workspace_url = config.get("DATABRICKS_HOST")
token = config.get("DATABRICKS_TOKEN")

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}

In [142]:

urllist = workspace_url + "/api/2.1/jobs/list"
response = requests.get(urllist, headers = headers)

print(response.status_code)
pprint.pprint(response.json())

200
{'has_more': False,
 'jobs': [{'created_time': 1717508072708,
           'creator_user_name': 'olonok@hotmail.com',
           'job_id': 864744060298442,
           'settings': {'email_notifications': {},
                        'format': 'MULTI_TASK',
                        'max_concurrent_runs': 1,
                        'name': 'New_Job_existing_cluster_2',
                        'tags': {'process': 'extraction',
                                 'project': 'data_analytics'},
                        'timeout_seconds': 3600}},
          {'created_time': 1717506959101,
           'creator_user_name': 'olonok@hotmail.com',
           'job_id': 621160275574138,
           'settings': {'email_notifications': {},
                        'format': 'MULTI_TASK',
                        'max_concurrent_runs': 1,
                        'name': 'New_Job_existing_cluster',
                        'tags': {'process': 'extraction',
                                 'project': 'data_analytic

# List all runs workspace

In [143]:
config.get("DATABRICKS_HOST")

'https://adb-1286930193882465.5.azuredatabricks.net'

In [144]:
workspace_url = config.get("DATABRICKS_HOST")
token = config.get("DATABRICKS_TOKEN")

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}

In [148]:

urllistrun = workspace_url + "/api/2.1/jobs/runs/list"

params={
"completed_only": "true"
}

response = requests.get(urllistrun, headers = headers, params=params)

print(response.status_code)
pprint.pprint(response.json())

200
{'has_more': False,
 'next_page_token': 'CAEQ66WagP4xINjz9veW_aIB',
 'prev_page_token': 'CAAQle2Al_4xILW7x6Tctyc=',
 'runs': [{'cleanup_duration': 0,
           'creator_user_name': 'olonok@hotmail.com',
           'end_time': 1717498836077,
           'execution_duration': 0,
           'format': 'MULTI_TASK',
           'job_id': 220500977018293,
           'number_in_job': 173438372273589,
           'run_duration': 540120,
           'run_id': 173438372273589,
           'run_name': 'Untitled',
           'run_page_url': 'https://adb-1286930193882465.5.azuredatabricks.net/?o=1286930193882465#job/220500977018293/run/173438372273589',
           'run_type': 'SUBMIT_RUN',
           'setup_duration': 0,
           'start_time': 1717498295957,
           'state': {'life_cycle_state': 'TERMINATED',
                     'result_state': 'SUCCESS',
                     'state_message': '',
                     'user_cancelled_or_timedout': False}},
          {'cleanup_duration': 0,
   

# Cancel a Job Run

In [149]:
config.get("DATABRICKS_HOST")

'https://adb-1286930193882465.5.azuredatabricks.net'

In [160]:
workspace_url = config.get("DATABRICKS_HOST")
token = config.get("DATABRICKS_TOKEN")

headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}

In [161]:
body = {
  "run_id": 640968121528131
}
urlcancelrun = workspace_url + "/api/2.1/jobs/runs/cancel"
response = requests.post(urlcancelrun, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{}


# Workspace 

In [103]:
urllist = workspace_url + "/api/2.0/workspace/list"
params={
"path": "/Users/olonok@hotmail.com/"
}
response = requests.get(urllist, headers = headers, params=params )

print(response.status_code)
print(response.text)

200
{"objects":[{"object_type":"NOTEBOOK","path":"/Users/olonok@hotmail.com/jobs-api","language":"PYTHON","created_at":1717159068335,"modified_at":1717482081431,"object_id":760923413360882,"resource_id":"760923413360882"},{"object_type":"MLFLOW_EXPERIMENT","path":"/Users/olonok@hotmail.com/pipelines","object_id":878341338033203,"resource_id":"878341338033203"},{"object_type":"NOTEBOOK","path":"/Users/olonok@hotmail.com/mlflow-end-to-end","language":"PYTHON","created_at":1717447044835,"modified_at":1717447044888,"object_id":1899686197831447,"resource_id":"1899686197831447"},{"object_type":"MLFLOW_EXPERIMENT","path":"/Users/olonok@hotmail.com/multistep_workflow","object_id":1910808740223562,"resource_id":"1910808740223562"},{"object_type":"NOTEBOOK","path":"/Users/olonok@hotmail.com/MLflow with Optuna: Hyperparameter Optimization and Tracking","language":"PYTHON","created_at":1717344828119,"modified_at":1717395402270,"object_id":2546891085146716,"resource_id":"2546891085146716"},{"object

# Machine Learning

In [104]:
urlrun_id = workspace_url + "/api/2.0/mlflow/runs/get"
params={
"run_id": "0e3c41c688d841f39c83ccd0095d5954"
}
response = requests.get(urlrun_id, headers = headers, params=params )

print(response.status_code)
print(response.text)

200
{"run":{"info":{"run_id":"0e3c41c688d841f39c83ccd0095d5954","run_uuid":"0e3c41c688d841f39c83ccd0095d5954","experiment_id":"2546891085146744","run_name":"charming-hog-782","status":"FINISHED","start_time":1717353627765,"end_time":1717353628754,"artifact_uri":"dbfs:/databricks/mlflow-tracking/2546891085146744/0e3c41c688d841f39c83ccd0095d5954/artifacts","lifecycle_stage":"active"},"data":{"metrics":[{"key":"mse","value":34331.82597229768,"timestamp":1717353628366,"step":0},{"key":"rmse","value":185.28849390153098,"timestamp":1717353628551,"step":0}],"params":[{"key":"alpha","value":"5.0206024979717066e-08"},{"key":"booster","value":"gbtree"},{"key":"eta","value":"0.6943485418169614"},{"key":"eval_metric","value":"rmse"},{"key":"gamma","value":"0.00019463076719002743"},{"key":"grow_policy","value":"depthwise"},{"key":"lambda","value":"1.238357756908523e-06"},{"key":"max_depth","value":"5"},{"key":"objective","value":"reg:squarederror"}],"tags":[{"key":"mlflow.databricks.cluster.id","va

# Create cluster

In [105]:
# ccreate cluster
body = {
  "cluster_name": "single-node-cluster",
  "spark_version": "14.3.x-scala2.12",
  "node_type_id":"Standard_DS3_v2",
  "num_workers": 0,
  "spark_conf": {
    "spark.databricks.cluster.profile": "singleNode",
    "spark.master": "[*, 4]"
  },
  "custom_tags": {
    "ResourceClass": "SingleNode"
  },

}


urlcreate_cluster = workspace_url + "/api/2.0/clusters/create"
response = requests.post(urlcreate_cluster, headers = headers, data = json.dumps(body))

print(response.status_code)
print(response.text)

200
{"cluster_id":"0604-092256-b5lijbdg"}
