In [0]:
dbutils.widgets.text("job_name","stg2ds-drone-sim")

In [0]:
%run "./dbx_rest_util"

In [0]:
from datetime import datetime

Python helper functions

In [0]:
#miliseconds to seconds
def milis_to_seconds(milis):
    return milis / 1000

#miliseconds to timestamp
def milis_to_date(milis):
    return datetime.fromtimestamp(milis_to_seconds(milis))

Helper functions

In [0]:
def get_job_by_name(job_name_pattern: str, full_name: bool = False) -> []:
    """
    Getting jobs by name, if full_name = True, full name must match

    Parameters
    ----------
    job_name_pattern : str
        Job name pattern to be searched
    full_name : bool
        If True, full name must match

    Returns
    -------
    list
        a list of job tasks. There can be more the one match when full name is False. When full_name is set to True list will have only one element.
    """
    jobs = list_jobs()['jobs']
    job_ids = []
    for job in jobs:
        if job_name_pattern in job.get('settings').get('name',None):
            if full_name:
                if job_name_pattern == job.get('settings').get('name',None):
                    job_ids.append(job['job_id'])
            else:
                job_ids.append(job['job_id'])
    return job_ids

In [0]:
def get_task_execution(run: {}) -> []:
    """
    Getting task execution details

    Parameters
    ----------
    run : dict
        run details return by Databricks Rest API
        https://docs.databricks.com/api/azure/workspace/jobs/listruns#runs

    Returns
    -------
    list
        a list of job tasks. There can be more the one match when full name is False. When full_name is set to True list will have only one element.
    """
    task_data = []
    for task in run["tasks"]:
        task_key = task['task_key']                               
        run_duration = milis_to_seconds(task['execution_duration'])
        task_data.append([run['run_id'],task_key, run_duration,
                            datetime.fromtimestamp(milis_to_seconds(task['start_time'])),
                            datetime.fromtimestamp(milis_to_seconds(task['end_time'])), 
                            run['state']['result_state'],
                            task['notebook_task']['notebook_path'], task['notebook_task']['source']
                            ])
    return task_data

In [0]:
in_job_name = dbutils.widgets.get('job_name')
#get job by it's exact name
job_id = get_job_by_name(in_job_name, True)
job_runs = list_jobs_runs(job_id[0])
tasks_df = None
if len(job_runs) > 0:
    for run in job_runs['runs']:
        if run['status']['state'] != 'RUNNING': 
            # Loop through tasks and get task_key and run_duration
            run = get_job_run(run['run_id'])
            task_data = get_task_execution(run)
            
            task_df = spark.createDataFrame(task_data, ['run_id', 'task_key', 'run_duration_seconds', 'start_time', 'end_time','result_state', 'notebook_path', 'notebook_source'])
            tasks_df = task_df if tasks_df is None else tasks_df.union(task_df)

tasks_df.write.mode('overwrite').saveAsTable(f"{DB_NAME}.audit_job_run")

In [0]:
%sql
select
  jr.run_id
, jr.task_key 
, jr.run_duration_seconds
, jr.start_time
, jr.end_time
, lag(run_id) over(partition by task_key order by start_time) as previous_run_id 
, lag(run_duration_seconds) over(partition by task_key order by start_time) as previous_run_duration_seconds
, jr.run_duration_seconds - previous_run_duration_seconds as performance_degradation_seconds
, round((performance_degradation_seconds/previous_run_duration_seconds)*100,2) as performance_degradation_percentage
, jr.notebook_path
, jr.notebook_source 
from next_level_dm.audit_job_run jr
where task_key like '%drone%'
order by start_time desc