In [0]:
%python
import requests
import json
import time
import traceback
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_timestamp
from databricks.sdk import WorkspaceClient

# API configuration
API_URL = "http://10.223.106.19:8000"
MAX_RETRIES = 2
RETRY_DELAY = 2  # seconds

# Initialize Spark session
spark = SparkSession.builder.getOrCreate()

# Initialize Databricks WorkspaceClient
w = WorkspaceClient()

# Retrieve API key from Databricks Secret
try:
    API_KEY = dbutils.secrets.get(scope="dev_secret_scope", key="fast-api-key")
    print("API key retrieved from secrets")
except Exception as e:
    print(f"Error retrieving API key: {e}")
    raise Exception(f"Failed to retrieve API key: {e}")

# Get the workflow name and job ID from the notebook context
try:
    # context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
    # workflow_name = context.tags().get("jobName")
    # print("testing",workflow_name)
    # job_id = context.tags().get("jobId")
    job_id = dbutils.notebook.entry_point.getDbutils().notebook().getContext().jobId().get()
    workflow_name = dbutils.notebook.entry_point.getDbutils().notebook().getContext().jobName().get()
    job_id = str(job_id)
    # job_id = job_id[5:-1]
    if workflow_name:
        workflow_name = str(workflow_name)
        # workflow_name = workflow_name[5:-1]  # Extract the inner value
    script_name = workflow_name
    print(f"Script name: {script_name}, Job ID: {job_id}")
except Exception as e:
    print(f"Error getting workflow context: {e}")
    raise Exception("Failed to retrieve workflow context")

# Fetch job definition using Databricks SDK
def get_job_definition(job_id, script_name):
    try:
        if job_id:
            job = w.jobs.get(job_id=job_id)
            return job.as_dict()
        else:
            print(f"No job ID provided, cannot fetch job definition for {script_name}")
            return {}
    except Exception as e:
        print(f"Error fetching job definition: {e}")
        return {}

# Extract parameters from job definition
def extract_parameters(script_name, job_id):
    parameters = {}
    all_values = []
    
    job_def = get_job_definition(job_id, script_name)
    if not job_def:
        print(f"No job definition found for {script_name}, cannot extract parameters")
        return parameters, all_values

    try:
        if "tasks" in job_def.get("settings", {}):
            for task in job_def["settings"]["tasks"]:
                if "notebook_task" in task and "base_parameters" in task["notebook_task"]:
                    task_params = task["notebook_task"]["base_parameters"]
                    for key, value in task_params.items():
                        parameters[key] = value
                        all_values.append(str(value))
        print(f"Parameters extracted from job definition: {parameters}")
    except Exception as e:
        print(f"Error extracting parameters from job definition: {e}")
        print(traceback.format_exc())
    
    return parameters, all_values

# Combine parameters
parameters, job_values = extract_parameters(script_name, job_id)

all_values = job_values 
print(all_values)
parameters_str = " ".join(str(v) for v in all_values if v)
print(f"Combined parameter values: '{parameters_str}'")
print(f"Running script: {script_name}")

# Check API health
def check_api_health():
    try:
        response = requests.get(
            f"{API_URL}/health",
            headers={"X-API-Key": API_KEY},
            timeout=10
        )
        if response.status_code == 200:
            print("API is accessible!")
            return True
        else:
            error_detail = response.json().get("detail", response.text) if response.content else "No response content"
            print(f"API returned status code: {response.status_code}, Detail: {error_detail}")
            return False
    except Exception as e:
        print(f"Error connecting to API: {e}")
        return False

# Run script with retry logic
def run_script(script_name, parameters_str):
    retries = 0
    while retries < MAX_RETRIES:
        try:
            payload = {"script_name": script_name}
            if parameters_str:
                payload["parameters"] = parameters_str
                
            response = requests.post(
                f"{API_URL}/run-script",
                json=payload,
                headers={"X-API-Key": API_KEY},
                timeout=300
            )
            
            if response.status_code == 200:
                return response.json()
            elif response.status_code == 429:
                retry_after = int(response.headers.get('Retry-After', RETRY_DELAY))
                error_detail = response.json().get("detail", "Rate limit exceeded") if response.content else "No response content"
                print(f"Rate limit exceeded: {error_detail}. Retrying in {retry_after} seconds...")
                time.sleep(retry_after)
                retries += 1
                continue
            else:
                error_detail = response.json().get("detail", response.text) if response.content else "No response content"
                raise Exception(f"API error: Status code {response.status_code}, Detail: {error_detail}")
        except Exception as e:
            print(f"Error running script: {e}")
            retries += 1
            if retries < MAX_RETRIES:
                print(f"Retrying ({retries}/{MAX_RETRIES})...")
                time.sleep(RETRY_DELAY)
            continue
    
    raise Exception(f"Maximum retries ({MAX_RETRIES}) exceeded. Unable to execute script.")

# Prepare data for storage
def prepare_data_for_storage(json_data):
    if not json_data:
        print("No data to store")
        return None
    
    try:
        text_value = json.dumps(json_data)
        data = [(text_value,)]
        df = spark.createDataFrame(data, ["text_value"])
        df = df.withColumn("updated_date", current_timestamp())
        return df
    except Exception as e:
        print(f"Error preparing data for storage: {e}")
        return None

# Main execution
try:
    if not script_name:
        raise Exception("No valid script name provided. Please provide a script_name parameter.")

    if not check_api_health():
        raise Exception("Cannot run script. API is not accessible.")

    print(f"Running script: {script_name} with parameters: {parameters_str}")
    result = run_script(script_name, parameters_str)

    if not result.get("success", False):
        error_msg = result.get("output", "No error message provided by API")
        raise Exception(f"Script execution failed: {error_msg}")

    print(f"\nScript execution successful: {result['success']}")
    if result.get("error"):
        print(f"\nExecution stderr (logged for reference):\n{result['error']}")

    storage_df = prepare_data_for_storage(result)
    if storage_df is not None:
        print("\nData prepared for storage:")
        display(storage_df)
        table_name = f"{script_name.replace('.', '_').replace('-', '_')}"
        storage_df.write.format("delta").mode("append").saveAsTable(table_name)
        print(f"Results saved to table: {table_name}")
    else:
        raise Exception("Failed to prepare data for storage.")
        
except Exception as e:
    print(f"Error in main execution: {e}")
    print(traceback.format_exc())
    raise

API key retrieved from secrets
Script name: None
Combined parameter values: ''
Running script: None
API is accessible!
Running script: None with parameters: 
Error running script: API error: Status code 404, Detail: Script not found: None
Retrying (1/2)...
Error running script: API error: Status code 404, Detail: Script not found: None


[0;31m---------------------------------------------------------------------------[0m
[0;31mException[0m                                 Traceback (most recent call last)
File [0;32m<command-4605066948949457>, line 214[0m
[1;32m    212[0m [38;5;66;03m# Run the script with parameters[39;00m
[1;32m    213[0m [38;5;28mprint[39m([38;5;124mf[39m[38;5;124m"[39m[38;5;124mRunning script: [39m[38;5;132;01m{[39;00mscript_name[38;5;132;01m}[39;00m[38;5;124m with parameters: [39m[38;5;132;01m{[39;00mparameters_str[38;5;132;01m}[39;00m[38;5;124m"[39m)
[0;32m--> 214[0m result [38;5;241m=[39m run_script(script_name, parameters_str)
[1;32m    216[0m [38;5;66;03m# Check if the script execution was successful[39;00m
[1;32m    217[0m [38;5;28;01mif[39;00m [38;5;129;01mnot[39;00m result[38;5;241m.[39mget([38;5;124m"[39m[38;5;124msuccess[39m[38;5;124m"[39m, [38;5;28;01mFalse[39;00m):

File [0;32m<command-4605066948949457>, line 131[0m, in [0;36mrun_