# export_job_definitions_to_repo

This notebook exports the JSON definitions of a Databricks job into your Git-backed workspace repo under `databricks/job_definitions/{job_name}/`. It saves three JSON variants (`get`, `create`, and `reset`) for better change tracking and automation.

This notebook is meant to be run only in the Databricks cloud environment, not on your local computer.

## How to Run This Notebook
- Choose or enter the values for `DRY_RUN` and `job_id` in the widgets above.
- If no widgets are visible, run the first cell below this one. The widgets should now be visible.
- Choose `DRY_RUN = true` to test the functionality without actually saving files to the repo.
- Choose `DRY_RUN = false` to save files for real.
- Click `Run all`

## When to Run This Notebook

- To **back up or version control** your job configurations alongside your code.
- After making **intentional job changes** that you want to persist and commit.

## When NOT to Run This Notebook

- During temporary or experimental job changes you do **not want to commit**.
- Without a valid job ID or when unsure of the job’s purpose.

## How to Find the Job ID

1. In Databricks UI, navigate to **Jobs**.
1. Click on the job you want to export.
1. In the side panel, the job id is the first item under `Job Details`. (The job id is also in the URL: `https://{your workspace id}.azuredatabricks.net/jobs/{job_id}`.)
1. Enter the job ID in the `job_id` parameter widget above.

## Using the `DRY_RUN` Flag

- **Set `DRY_RUN` to `true` (default recommended for debugging):**  
  - The notebook fetches the job definition and prints what it *would* do.  
  - No files are written to the repo, and temp files are **not deleted** (allowing inspection).
- **Set `DRY_RUN` to `false` to perform actual export:**  
  - The notebook writes JSON files into the Git-backed repo folder.  
  - Temporary files are cleaned up after a successful run.

Always review outputs before setting `DRY_RUN=false` to avoid unintended overwrites. 

In [0]:
# Initialize widgets

def widget_exists(name):
    try:
        dbutils.widgets.get(name)
        return True
    except Exception:
        return False

if not widget_exists("job_id"):
    dbutils.widgets.text("job_id", "")

if not widget_exists("DRY_RUN"):
    dbutils.widgets.dropdown("DRY_RUN", "true", ["true", "false"])

In [0]:
# Get parameters from widgets

job_id_str = dbutils.widgets.get("job_id")
DRY_RUN = dbutils.widgets.get("DRY_RUN").lower() == "true"

if not job_id_str.isdigit():
    raise ValueError("Invalid job_id provided.")
    
JOB_ID = int(job_id_str)

In [0]:
# Imports and config
import os
import json
import shutil
import requests
import copy


# Securely read from secret scope
DATABRICKS_TOKEN = dbutils.secrets.get(scope="spelling-bee-scope", key="databricks-token")

DATABRICKS_HOST = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()
print(f"Detected Databricks Host: {DATABRICKS_HOST}")

# Git-backed repo path
user_name = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().get("user").get()
user_home = f"/Workspace/Users/{user_name}"
print(f"User home path: {user_home}")

REPO_BASE_PATH = f"{user_home}/spelling-bee-solver-training"
print(f"Repo base path: {REPO_BASE_PATH}")

JOB_DEF_PATH = f"{REPO_BASE_PATH}/databricks/job_definitions"

# Local temp directory
LOCAL_TMP_DIR = "/tmp/job_exports"

In [0]:
# Utility functions

METADATA_FIELDS = ["creator_user_name", "run_as_user_name", "run_as_owner"]

def strip_metadata_fields(job_json):
    for field in METADATA_FIELDS:
        job_json.pop(field, None)

    return job_json

def get_job_definition(job_id):
    url = f"{DATABRICKS_HOST}/api/2.1/jobs/get?job_id={job_id}"
    headers = {"Authorization": f"Bearer {DATABRICKS_TOKEN}"}
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return strip_metadata_fields(response.json())

def convert_for_create(job_json):
    # Make a deep copy of the settings
    create_json = copy.deepcopy(job_json["settings"])
    create_json.pop("format", None)
    create_json.pop("run_as", None)  # remove user-specific execution context
    return create_json

def convert_for_reset(job_json):
    return {
        "job_id": job_json["job_id"],
        "new_settings": convert_for_create(job_json)
    }

def save_json_locally(obj, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def copy_to_repo(local_path, repo_path):
    if DRY_RUN:
        print(f"DRY RUN: Would have copied {local_path} to {repo_path}")
        return
    
    # in order to overwrite, first attempt to remove file
    try:
        dbutils.fs.rm(f"file:{repo_path}")
    except Exception:
        pass  # file might not exist, which is fine

    # copy file
    dbutils.fs.cp(f"file:{local_path}", f"file:{repo_path}")

In [0]:
# Main logic

success = False

try:
    job_json = get_job_definition(JOB_ID)
    job_name = job_json["settings"]["name"]

    # File paths
    job_dir = os.path.join(JOB_DEF_PATH, job_name)
    os.makedirs(job_dir, exist_ok=True)

    local_get = os.path.join(LOCAL_TMP_DIR, f"{job_name}_get.json")
    local_create = os.path.join(LOCAL_TMP_DIR, f"{job_name}_create.json")
    local_reset = os.path.join(LOCAL_TMP_DIR, f"{job_name}_reset.json")

    repo_get = os.path.join(job_dir, f"{job_name}_get.json")
    repo_create = os.path.join(job_dir, f"{job_name}_create.json")
    repo_reset = os.path.join(job_dir, f"{job_name}_reset.json")

    # Save all variants
    save_json_locally(job_json, local_get)
    save_json_locally(convert_for_create(job_json), local_create)
    save_json_locally(convert_for_reset(job_json), local_reset)

    # Copy to workspace repo
    copy_to_repo(local_get, repo_get)
    copy_to_repo(local_create, repo_create)
    copy_to_repo(local_reset, repo_reset)

    if not DRY_RUN:
        print(f"✅ Exported job '{job_name}' to workspace repo at {job_dir}")
    success = True

except Exception as e:
    print(f"❌ Error exporting job: {e}")

In [0]:
if success:
    if DRY_RUN:
        print("🧪 DRY_RUN enabled — skipping temp file cleanup.")
    else:
        shutil.rmtree(LOCAL_TMP_DIR, ignore_errors=True)
        print("✅ Cleaned up temporary files.")
else:
    print(f"⚠️ Temp files left in {LOCAL_TMP_DIR} for inspection.")

In [0]:
if DRY_RUN or not success:
    print("⚠️ Here are the temp files that would have been created:")
    for file_path in [local_get, local_create, local_reset]:
        print(f"File path: {file_path}")
        with open(file_path, 'r') as f:
            print(f.read())
        print(f"=====end of file======\n")