In [0]:
from concurrent.futures import ThreadPoolExecutor, Future
from dataclasses import dataclass
from typing import List
import json
 
# used to carry notebook data
@dataclass
class Notebook:
  path: str
  timeout: int
  parameters: dict = None
  retry: int = 0
  enabled:bool = True
     
  # add the notebook name to parameters using the path and return
  def getParameters(self):
     
    if not self.parameters:
      self.parameters = dict()
       
    params = self.parameters
    params["notebook"] = self.path
    return params
 
# execute a notebook using databricks workflows
def executeNotebook(notebook:Notebook):
   
  print(f"Executing notebook {notebook.path}")
   
  try:
     
    return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.getParameters())
   
  except Exception as e:
     
    if notebook.retry < 1:
      failed = json.dumps({
          "status" : "failed",
          "error" : str(e),
          "notebook" : notebook.path})
      raise Exception(failed)
     
    print(f"Retrying notebook {notebook.path}")
    notebook.retry -= 1
   
   
def tryFuture(future:Future):
  try:
    return json.loads(future.result())
  except Exception as e:
    return json.loads(str(e))
   
   
# Parallel execute a list of notebooks
def executeNotebooks(notebooks:List[Notebook], maxParallel:int):
   
  print(f"Executing {len(notebooks)} notebooks with a maxParallel of {maxParallel}")
  with ThreadPoolExecutor(max_workers=maxParallel) as executor:
 
    results = [executor.submit(executeNotebook, notebook)
            for notebook in notebooks 
            if notebook.enabled]
   
    # the individual notebooks handle their errors and pass back a packaged result
    # we will still need to handle the fact that the notebook execution call may fail
    # or a programmer missed the handling of an error in the notebook task
    # that's what tryFuture(future:Future) does    
    return [tryFuture(r) for r in results]
 
   
# build a list of notebooks to run
notebooks = [
  Notebook("./py1", 3600, {"waittimeout": 15}, 0, True),
  Notebook("./py2", 3600, {"waittimeout": 10}, 0, True),
  Notebook("./py3", 3600, {"waittimeout": 8},  0, True),
  Notebook("./py4", 3600, {"waittimeout": 6},  0, True),
  Notebook("./py5", 3600, {"waittimeout": 18},  0, True),
  Notebook("./py6", 3600, {"waittimeout": 3},  0, True),

]
 
# execute the notebooks in 4 parallel tasks
results = executeNotebooks(notebooks, 3)
 
# show the results
print(results)

Executing 6 notebooks with a maxParallel of 3
Executing notebook ./py1
Executing notebook ./py2
Executing notebook ./py3
Executing notebook ./py4
Executing notebook ./py5
Executing notebook ./py6
[{'status': 'succeeded', 'notebook': './py1'}, {'status': 'succeeded', 'notebook': './py2'}, {'status': 'succeeded', 'notebook': './py3'}, {'status': 'succeeded', 'notebook': './py4'}, {'status': 'succeeded', 'notebook': './py5'}, {'status': 'succeeded', 'notebook': './py6'}]
