In [None]:
import json
import time
from kubernetes import client, config

# Load Kubernetes config
config.load_kube_config()

# Create Kubernetes API instances
v1 = client.CoreV1Api()
batch_v1 = client.BatchV1Api()

# Function to create a Kubernetes job manifest
def create_job_manifest(name, cpu, memory, command):
    return {
        "apiVersion": "batch/v1",
        "kind": "Job",
        "metadata": {"name": name},
        "spec": {
            "template": {
                "spec": {
                    "containers": [
                        {
                            "name": name,
                            "image": "busybox",  # Replace with your desired image
                            "command": ["sh", "-c", command],
                            "resources": {"requests": {"cpu": cpu, "memory": memory}},
                        }
                    ],
                    "restartPolicy": "Never",
                }
            }
        },
    }

# Read and process the trace file
trace_file = "filtered_cluster_job_log.json"
with open(trace_file, "r") as f:
    trace_data = json.load(f)

# Iterate over each job in the trace
for job in trace_data:
    job_id = job["jobid"]
    user = job["user"]
    attempts = job.get("attempts", [])

    # Ensure at least one valid attempt exists
    if not attempts:
        print(f"Skipping job {job_id}: No valid scheduling attempts found.")
        continue

    # Process the first attempt (or customize for your use case)
    attempt = attempts[0]
    start_time = attempt.get("start_time")
    end_time = attempt.get("end_time")
    resources = attempt.get("detail", [])

    # Extract resource details (example uses CPU and memory placeholders)
    # You can refine based on your needs (e.g., GPU info)
    if resources:
        server_info = resources[0]
        ip = server_info["ip"]
        gpus = server_info.get("gpus", [])
        cpu_request = "1"  # Placeholder for CPU (customize as needed)
        memory_request = "1Gi"  # Placeholder for memory (customize as needed)
    else:
        print(f"Skipping job {job_id}: No resource details found.")
        continue

    # Log the job details
    print(f"Scheduling job {job_id} from user {user} on server {ip} using GPUs: {gpus}")

    # Create and deploy the Kubernetes job
    command = f"echo 'Processing job {job_id}'"
    job_manifest = create_job_manifest(job_id, cpu_request, memory_request, command)
    batch_v1.create_namespaced_job(namespace="default", body=job_manifest)

    # Simulate submission delay based on the job's submission time
    if start_time:
        print(f"Job {job_id} scheduled with start time: {start_time}")
        time.sleep(2)  # Adjust delay as per your simulation needs


In [None]:
import json
import time
from kubernetes import client, config

# Load Kubernetes config
config.load_kube_config()

# Create API instances
v1 = client.CoreV1Api()
batch_v1 = client.BatchV1Api()

# Load trace from the JSON file
with open("filtered_cluster_job_log.json", "r") as f:
    trace = json.load(f)

# Helper function to parse attempts and find the first valid one
def get_first_valid_attempt(attempts):
    for attempt in attempts:
        if attempt.get("start_time") and attempt.get("end_time"):
            return attempt
    return None

# Function to create a job manifest from trace details
def create_job_manifest(name, cpu, memory, command):
    return {
        "apiVersion": "batch/v1",
        "kind": "Job",
        "metadata": {"name": name},
        "spec": {
            "template": {
                "spec": {
                    "containers": [
                        {
                            "name": name,
                            "image": "busybox",  # Replace with your desired image
                            "command": ["sh", "-c", command],
                            "resources": {"requests": {"cpu": cpu, "memory": memory}},
                        }
                    ],
                    "restartPolicy": "Never",
                }
            }
        },
    }

# Process each job in the trace and deploy workloads
for job in trace:
    if job["status"] != "Pass":
        continue  # Skip jobs that didn't pass
    
    job_name = job["jobid"].replace("application_", "").replace("_", "-")
    submitted_time = job.get("submitted_time")
    attempts = job.get("attempts", [])
    
    # Find the first valid attempt
    first_attempt = get_first_valid_attempt(attempts)
    if not first_attempt:
        print(f"Skipping job {job_name} due to missing valid attempts.")
        continue

    # Extract scheduling information
    start_time = first_attempt.get("start_time")
    gpus = first_attempt["detail"][0].get("gpus", []) if first_attempt["detail"] else []

    # Simulate workload deployment with some assumptions for resources
    cpu_request = f"{len(gpus) * 500}m"  # Assume 500m CPU per GPU
    memory_request = f"{len(gpus) * 512}Mi"  # Assume 512Mi memory per GPU
    command = f"echo 'Job {job_name} running with {len(gpus)} GPUs'"

    # Simulate the delay based on submitted time and start time
    delay = 5  # Default delay in seconds
    print(f"Waiting {delay} seconds before deploying job {job_name}...")
    time.sleep(delay)

    # Create and deploy the job
    job_manifest = create_job_manifest(job_name, cpu_request, memory_request, command)
    batch_v1.create_namespaced_job(namespace="default", body=job_manifest)
    print(f"Deployed job {job_name} with {len(gpus)} GPUs.")
