In [10]:
import asyncio
import multiprocessing as mp
import threading
import time
import uuid
import heapq
from enum import Enum
from typing import Dict, List, Set, Any, Optional, Callable
from dataclasses import dataclass, field
import logging
from datetime import datetime
import pickle
import types

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('TaskScheduler')

In [11]:
class SerializableTask:
    """A serializable representation of a task for inter-process communication"""
    def __init__(self, task_id: str, func_name: str, func_module: str, 
                 func_code: str, args: tuple, kwargs: dict, timeout: Optional[float]):
        self.task_id = task_id
        self.func_name = func_name
        self.func_module = func_module
        self.func_code = func_code
        self.args = args
        self.kwargs = kwargs
        self.timeout = timeout

    def execute(self):
        """Execute the task by reconstructing the function"""
        try:
            # For simple cases, try to get the function from the module
            if self.func_module and self.func_name:
                module = __import__(self.func_module, fromlist=[self.func_name])
                func = getattr(module, self.func_name)
            else:
                # Fallback: use exec (be careful with this in production)
                local_scope = {}
                exec(self.func_code, globals(), local_scope)
                func = local_scope.get(self.func_name)
            
            if not func:
                raise ValueError(f"Function {self.func_name} not found")
            
            # Execute the function
            result = func(*self.args, **self.kwargs)
            return result
            
        except Exception as e:
            raise Exception(f"Failed to execute task {self.task_id}: {str(e)}")

In [12]:
class Task:
    def __init__(self, task_id: str, func: Callable, args: tuple = (), kwargs: dict = None,
                 priority: TaskPriority = TaskPriority.NORMAL, dependencies: List[str] = None,
                 timeout: Optional[float] = None, retries: int = 0):
        self.task_id = task_id
        self.func = func
        self.args = args
        self.kwargs = kwargs or {}
        self.priority = priority
        self.dependencies = dependencies or []
        self.timeout = timeout
        self.retries = retries
        self.retries_left = retries
        self.status = TaskStatus.PENDING
        self.result = None
        self.error = None
        self.created_at = time.time()
        self.started_at = None
        self.completed_at = None
        self.worker_id = None

    def to_serializable(self) -> SerializableTask:
        """Convert to serializable format for inter-process communication"""
        func_name = self.func.__name__
        func_module = self.func.__module__
        
        # Get function source code as fallback
        try:
            func_code = pickle.dumps(self.func)  # Try to pickle the function
        except:
            func_code = f"# Function {func_name} from {func_module}"
        
        return SerializableTask(
            task_id=self.task_id,
            func_name=func_name,
            func_module=func_module,
            func_code=func_code,
            args=self.args,
            kwargs=self.kwargs,
            timeout=self.timeout
        )

In [13]:
class Worker:
    def __init__(self, worker_id: str, task_queue: mp.Queue, result_queue: mp.Queue):
        self.worker_id = worker_id
        self.task_queue = task_queue
        self.result_queue = result_queue
        self.current_task = None
        self.is_running = True

    def run(self):
        """Worker process main loop"""
        logger.info(f"Worker {self.worker_id} started")
        
        while self.is_running:
            try:
                # Get task from queue with timeout to allow graceful shutdown
                task_data = self.task_queue.get(timeout=1.0)
                if task_data is None:  # Shutdown signal
                    break
                    
                serializable_task = task_data
                self.current_task = serializable_task.task_id
                
                logger.info(f"Worker {self.worker_id} executing task {self.current_task}")
                
                # Execute task with timeout
                try:
                    if serializable_task.timeout:
                        result = self._execute_with_timeout(serializable_task, serializable_task.timeout)
                    else:
                        result = serializable_task.execute()
                    
                    self.result_queue.put((serializable_task.task_id, result, None, self.worker_id))
                    logger.info(f"Worker {self.worker_id} completed task {self.current_task}")
                    
                except asyncio.TimeoutError:
                    self.result_queue.put((serializable_task.task_id, None, "timeout", self.worker_id))
                    logger.warning(f"Worker {self.worker_id} timeout on task {self.current_task}")
                except Exception as e:
                    self.result_queue.put((serializable_task.task_id, None, str(e), self.worker_id))
                    logger.error(f"Worker {self.worker_id} failed task {self.current_task}: {str(e)}")
                    
            except:
                continue  # Timeout for graceful shutdown
                
        logger.info(f"Worker {self.worker_id} stopped")

    def _execute_with_timeout(self, task: SerializableTask, timeout: float):
        """Execute function with timeout"""
        import concurrent.futures
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
            future = executor.submit(task.execute)
            try:
                return future.result(timeout=timeout)
            except concurrent.futures.TimeoutError:
                raise asyncio.TimeoutError(f"Task timeout after {timeout} seconds")

    def stop(self):
        self.is_running = False

In [14]:
def long_running_task(duration: float, task_id: str):
    """Example task function that runs for a specified duration"""
    print(f"Task {task_id} running for {duration} seconds")
    time.sleep(duration)
    return f"Task {task_id} completed after {duration}s"

def failing_task():
    """Example task that always fails"""
    raise ValueError("This task is designed to fail")

def computation_task(x: int, y: int):
    """Example computation task"""
    return x * y

def data_processing_task(data: list):
    """Example data processing task"""
    return sum(data) / len(data) if data else 0

In [17]:
class DistributedTaskScheduler:
    def __init__(self, num_workers: int = 4, max_retries: int = 3):
        self.num_workers = num_workers
        self.max_retries = max_retries
        
        # Task management
        self.tasks: Dict[str, Task] = {}
        self.pending_tasks: List[PrioritizedTask] = []
        self.running_tasks: Dict[str, Task] = {}
        self.completed_tasks: Dict[str, Task] = {}
        self.failed_tasks: Dict[str, Task] = {}
        
        # Dependency tracking
        self.task_dependencies: Dict[str, Set[str]] = {}
        self.dependents: Dict[str, Set[str]] = {}
        
        # Worker management
        self.workers: Dict[str, Worker] = {}
        self.worker_processes: Dict[str, mp.Process] = {}
        
        # Queues
        self.task_queue = mp.Queue()
        self.result_queue = mp.Queue()
        
        # Synchronization
        self.lock = threading.Lock()
        self.scheduler_thread = None
        self.is_running = False
        
        # Monitoring
        self.metrics = {
            'tasks_completed': 0,
            'tasks_failed': 0,
            'tasks_cancelled': 0,
            'tasks_timed_out': 0,
            'workers_created': 0,
            'workers_failed': 0
        }
        
        # Start workers
        self._start_workers()

    def _start_workers(self):
        """Initialize and start worker processes"""
        for i in range(self.num_workers):
            self._add_worker()

    def _add_worker(self):
        """Add a new worker process"""
        worker_id = f"worker-{uuid.uuid4().hex[:8]}"
        worker = Worker(worker_id, self.task_queue, self.result_queue)
        process = mp.Process(target=worker.run)
        
        self.workers[worker_id] = worker
        self.worker_processes[worker_id] = process
        process.start()
        
        self.metrics['workers_created'] += 1
        logger.info(f"Started worker {worker_id}")

    def submit_task(self, func: Callable, args: tuple = (), kwargs: dict = None,
                   priority: TaskPriority = TaskPriority.NORMAL, 
                   dependencies: List[str] = None, timeout: Optional[float] = None,
                   retries: int = 0) -> str:
        """Submit a new task to the scheduler"""
        # Validate that the function can be serialized
        try:
            # Test if function can be pickled
            pickle.dumps(func)
        except (AttributeError, pickle.PicklingError) as e:
            raise ValueError(f"Function {func.__name__} cannot be serialized. "
                           f"Use module-level functions instead of local functions or lambdas. Error: {e}")
        
        task_id = f"task-{uuid.uuid4().hex[:8]}"
        task = Task(task_id, func, args, kwargs, priority, dependencies, timeout, retries)
        
        with self.lock:
            self.tasks[task_id] = task
            
            # Set up dependency tracking
            if dependencies:
                self.task_dependencies[task_id] = set(dependencies)
                for dep_id in dependencies:
                    if dep_id not in self.dependents:
                        self.dependents[dep_id] = set()
                    self.dependents[dep_id].add(task_id)
            
            # Add to pending queue if no dependencies
            if not dependencies or all(dep_id in self.completed_tasks for dep_id in dependencies):
                heapq.heappush(self.pending_tasks, 
                             PrioritizedTask(priority.value, task.created_at, task))
                logger.info(f"Submitted task {task_id} with priority {priority.name}")
            else:
                logger.info(f"Submitted task {task_id} waiting for dependencies")
        
        return task_id

    def start(self):
        """Start the task scheduler"""
        if self.is_running:
            return
            
        self.is_running = True
        self.scheduler_thread = threading.Thread(target=self._scheduler_loop)
        self.scheduler_thread.start()
        logger.info("Task scheduler started")

    def stop(self):
        """Stop the task scheduler and all workers"""
        self.is_running = False
        
        # Stop workers
        for worker_id, process in self.worker_processes.items():
            self.task_queue.put(None)  # Send shutdown signal
            process.join(timeout=5.0)
            if process.is_alive():
                process.terminate()
        
        if self.scheduler_thread:
            self.scheduler_thread.join()
            
        logger.info("Task scheduler stopped")

    def _scheduler_loop(self):
        """Main scheduler loop"""
        while self.is_running:
            try:
                # Process completed tasks
                self._process_results()
                
                # Schedule pending tasks
                self._schedule_tasks()
                
                # Handle worker failures
                self._check_worker_health()
                
                # Small sleep to prevent busy waiting
                time.sleep(0.1)
                
            except Exception as e:
                logger.error(f"Scheduler error: {str(e)}")
                time.sleep(1)  # Backoff on error

    def _process_results(self):
        """Process results from workers"""
        while not self.result_queue.empty():
            try:
                task_id, result, error, worker_id = self.result_queue.get_nowait()
                
                with self.lock:
                    if task_id not in self.tasks:
                        logger.warning(f"Received result for unknown task {task_id}")
                        continue
                    
                    task = self.tasks[task_id]
                    task.worker_id = worker_id
                    
                    if error == "timeout":
                        task.status = TaskStatus.TIMEOUT
                        task.error = "Task execution timed out"
                        self.failed_tasks[task_id] = task
                        self.metrics['tasks_timed_out'] += 1
                        logger.warning(f"Task {task_id} timed out")
                        
                    elif error:
                        if task.retries_left > 0:
                            # Retry the task
                            task.retries_left -= 1
                            task.status = TaskStatus.PENDING
                            heapq.heappush(self.pending_tasks, 
                                         PrioritizedTask(task.priority.value, 
                                                       task.created_at, task))
                            logger.info(f"Retrying task {task_id}, {task.retries_left} retries left")
                        else:
                            task.status = TaskStatus.FAILED
                            task.error = error
                            self.failed_tasks[task_id] = task
                            self.metrics['tasks_failed'] += 1
                            logger.error(f"Task {task_id} failed: {error}")
                    else:
                        task.status = TaskStatus.COMPLETED
                        task.result = result
                        task.completed_at = time.time()
                        self.completed_tasks[task_id] = task
                        self.metrics['tasks_completed'] += 1
                        logger.info(f"Task {task_id} completed successfully")
                    
                    # Remove from running tasks
                    if task_id in self.running_tasks:
                        del self.running_tasks[task_id]
                    
                    # Check dependents
                    self._check_dependents(task_id)
                    
            except:
                break

    def _schedule_tasks(self):
        """Schedule tasks to available workers"""
        with self.lock:
            available_workers = len(self.workers) - len(self.running_tasks)
            
            while available_workers > 0 and self.pending_tasks:
                prioritized_task = heapq.heappop(self.pending_tasks)
                task = prioritized_task.task
                
                # Double-check dependencies
                if task.dependencies and not all(dep_id in self.completed_tasks 
                                               for dep_id in task.dependencies):
                    # Still waiting for dependencies, put back
                    heapq.heappush(self.pending_tasks, prioritized_task)
                    continue
                
                # Convert to serializable format and send to worker
                serializable_task = task.to_serializable()
                self.task_queue.put(serializable_task)
                
                task.status = TaskStatus.RUNNING
                task.started_at = time.time()
                self.running_tasks[task.task_id] = task
                
                available_workers -= 1
                logger.info(f"Scheduled task {task.task_id} to worker")

    def _check_dependents(self, completed_task_id: str):
        """Check if any dependent tasks can now be scheduled"""
        if completed_task_id not in self.dependents:
            return
            
        for dependent_id in self.dependents[completed_task_id]:
            if dependent_id not in self.tasks:
                continue
                
            dependent_task = self.tasks[dependent_id]
            
            # Check if all dependencies are now satisfied
            if all(dep_id in self.completed_tasks 
                   for dep_id in dependent_task.dependencies):
                # Task is ready to be scheduled
                heapq.heappush(self.pending_tasks, 
                             PrioritizedTask(dependent_task.priority.value,
                                           dependent_task.created_at, dependent_task))
                logger.info(f"Task {dependent_id} dependencies satisfied, queued for execution")

    def _check_worker_health(self):
        """Check and handle worker failures"""
        dead_workers = []
        
        for worker_id, process in self.worker_processes.items():
            if not process.is_alive():
                dead_workers.append(worker_id)
                logger.warning(f"Worker {worker_id} died")
        
        for worker_id in dead_workers:
            # Reschedule tasks from dead worker
            self._reschedule_worker_tasks(worker_id)
            
            # Remove dead worker
            del self.workers[worker_id]
            del self.worker_processes[worker_id]
            self.metrics['workers_failed'] += 1
            
            # Replace worker
            self._add_worker()

    def _reschedule_worker_tasks(self, worker_id: str):
        """Reschedule tasks that were running on a failed worker"""
        with self.lock:
            tasks_to_reschedule = []
            
            for task_id, task in list(self.running_tasks.items()):
                if task.worker_id == worker_id:
                    tasks_to_reschedule.append(task)
                    del self.running_tasks[task_id]
                    
                    # Reset task status
                    task.status = TaskStatus.PENDING
                    task.started_at = None
                    task.worker_id = None
            
            # Add tasks back to pending queue
            for task in tasks_to_reschedule:
                heapq.heappush(self.pending_tasks, 
                             PrioritizedTask(task.priority.value, task.created_at, task))
                logger.info(f"Rescheduled task {task.task_id} from failed worker {worker_id}")

    def cancel_task(self, task_id: str) -> bool:
        """Cancel a pending or running task"""
        with self.lock:
            if task_id in self.tasks:
                task = self.tasks[task_id]
                
                if task.status == TaskStatus.PENDING:
                    # Remove from pending queue
                    new_pending = [pt for pt in self.pending_tasks if pt.task.task_id != task_id]
                    self.pending_tasks.clear()
                    for pt in new_pending:
                        heapq.heappush(self.pending_tasks, pt)
                    
                elif task.status == TaskStatus.RUNNING:
                    # Can't cancel running tasks directly, mark for cleanup
                    pass
                
                task.status = TaskStatus.CANCELLED
                self.metrics['tasks_cancelled'] += 1
                logger.info(f"Cancelled task {task_id}")
                return True
            
            return False

    def scale_workers(self, new_count: int):
        """Dynamically scale the number of workers"""
        with self.lock:
            current_count = len(self.workers)
            
            if new_count > current_count:
                # Add workers
                for _ in range(new_count - current_count):
                    self._add_worker()
                logger.info(f"Scaled up to {new_count} workers")
                
            elif new_count < current_count:
                # Remove workers (gracefully)
                workers_to_remove = current_count - new_count
                removed = 0
                
                for worker_id in list(self.workers.keys()):
                    if removed >= workers_to_remove:
                        break
                    
                    # Send shutdown signal
                    self.task_queue.put(None)
                    removed += 1
                    
                logger.info(f"Scaled down to {new_count} workers")

    def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
        """Get status of a specific task"""
        with self.lock:
            if task_id in self.tasks:
                task = self.tasks[task_id]
                return {
                    'task_id': task.task_id,
                    'status': task.status.value,
                    'priority': task.priority.name,
                    'created_at': task.created_at,
                    'started_at': task.started_at,
                    'completed_at': task.completed_at,
                    'worker_id': task.worker_id,
                    'result': task.result,
                    'error': task.error,
                    'retries_left': task.retries_left
                }
            return None

    def get_system_status(self) -> Dict[str, Any]:
        """Get overall system status and metrics"""
        with self.lock:
            return {
                'workers_total': len(self.workers),
                'workers_running': len([p for p in self.worker_processes.values() if p.is_alive()]),
                'tasks_pending': len(self.pending_tasks),
                'tasks_running': len(self.running_tasks),
                'tasks_completed': len(self.completed_tasks),
                'tasks_failed': len(self.failed_tasks),
                'metrics': self.metrics.copy()
            }

    def wait_for_task(self, task_id: str, timeout: Optional[float] = None) -> Any:
        """Wait for a specific task to complete and return its result"""
        start_time = time.time()
        
        while True:
            status = self.get_task_status(task_id)
            if not status:
                raise ValueError(f"Task {task_id} not found")
            
            if status['status'] in [TaskStatus.COMPLETED.value, TaskStatus.FAILED.value, 
                                  TaskStatus.CANCELLED.value, TaskStatus.TIMEOUT.value]:
                if status['status'] == TaskStatus.COMPLETED.value:
                    return status['result']
                elif status['status'] == TaskStatus.FAILED.value:
                    raise Exception(f"Task failed: {status['error']}")
                elif status['status'] == TaskStatus.CANCELLED.value:
                    raise Exception("Task was cancelled")
                elif status['status'] == TaskStatus.TIMEOUT.value:
                    raise Exception("Task timed out")
            
            if timeout and (time.time() - start_time) > timeout:
                raise TimeoutError(f"Wait for task {task_id} timed out")
            
            time.sleep(0.1)

In [18]:
def main():
    """Example demonstrating the distributed task scheduler"""
    
    # Initialize scheduler
    scheduler = DistributedTaskScheduler(num_workers=2)
    scheduler.start()
    
    try:
        # Submit independent tasks
        task1 = scheduler.submit_task(long_running_task, (2, "task1"), 
                                    priority=TaskPriority.HIGH)
        task2 = scheduler.submit_task(computation_task, (5, 10),
                                    priority=TaskPriority.NORMAL)
        
        # Submit task with dependencies
        task3 = scheduler.submit_task(long_running_task, (1, "task3"),
                                    dependencies=[task1, task2],
                                    priority=TaskPriority.NORMAL)
        
        # Submit task that will fail and retry
        task4 = scheduler.submit_task(failing_task, retries=2,
                                    priority=TaskPriority.LOW)
        
        # Submit task with timeout
        task5 = scheduler.submit_task(long_running_task, (10, "task5"),
                                    timeout=3.0, priority=TaskPriority.NORMAL)
        
        # Submit data processing task
        task6 = scheduler.submit_task(data_processing_task, ([1, 2, 3, 4, 5],),
                                    priority=TaskPriority.HIGH)
        
        print("Submitted tasks:")
        print(f"Task1 (high priority): {task1}")
        print(f"Task2 (normal): {task2}")
        print(f"Task3 (depends on 1,2): {task3}")
        print(f"Task4 (will fail, retries): {task4}")
        print(f"Task5 (will timeout): {task5}")
        print(f"Task6 (data processing): {task6}")
        
        # Monitor progress
        for i in range(30):
            status = scheduler.get_system_status()
            print(f"\n--- Iteration {i+1} ---")
            print(f"Pending: {status['tasks_pending']}, "
                  f"Running: {status['tasks_running']}, "
                  f"Completed: {status['tasks_completed']}, "
                  f"Failed: {status['tasks_failed']}")
            
            # Check specific tasks
            for task_id in [task1, task2, task3, task6]:
                task_status = scheduler.get_task_status(task_id)
                if task_status:
                    print(f"  {task_id}: {task_status['status']}")
            
            time.sleep(1)
            
            # Check if main tasks are done
            task3_status = scheduler.get_task_status(task3)
            if task3_status and task3_status['status'] == TaskStatus.COMPLETED.value:
                print(f"\nTask3 completed with result: {task3_status['result']}")
                break
        
        # Scale workers dynamically
        print("\nScaling workers to 4...")
        scheduler.scale_workers(4)
        time.sleep(2)
        
        print("Scaling workers to 1...")
        scheduler.scale_workers(1)
        time.sleep(2)
        
        # Get final status
        final_status = scheduler.get_system_status()
        print("\n=== Final System Status ===")
        for key, value in final_status.items():
            if key == 'metrics':
                print("Metrics:")
                for metric, count in value.items():
                    print(f"  {metric}: {count}")
            else:
                print(f"{key}: {value}")
                
        # Try to get results for completed tasks
        try:
            result1 = scheduler.wait_for_task(task1, timeout=1)
            print(f"\nTask1 result: {result1}")
        except Exception as e:
            print(f"Could not get Task1 result: {e}")
            
        try:
            result6 = scheduler.wait_for_task(task6, timeout=1)
            print(f"Task6 result: {result6}")
        except Exception as e:
            print(f"Could not get Task6 result: {e}")
            
    finally:
        scheduler.stop()
        print("\nScheduler stopped")



In [19]:
if __name__ == "__main__":
    main()

2025-10-06 11:54:34,679 - TaskScheduler - INFO - Started worker worker-81d75735
2025-10-06 11:54:34,682 - TaskScheduler - INFO - Worker worker-81d75735 started
2025-10-06 11:54:34,696 - TaskScheduler - INFO - Started worker worker-cbf776c6
2025-10-06 11:54:34,701 - TaskScheduler - INFO - Task scheduler started
2025-10-06 11:54:34,704 - TaskScheduler - INFO - Submitted task task-3dd57bf0 with priority HIGH
2025-10-06 11:54:34,706 - TaskScheduler - INFO - Submitted task task-8a682add with priority NORMAL
2025-10-06 11:54:34,708 - TaskScheduler - INFO - Submitted task task-9844d296 waiting for dependencies
2025-10-06 11:54:34,709 - TaskScheduler - INFO - Submitted task task-109595f5 with priority LOW
2025-10-06 11:54:34,711 - TaskScheduler - INFO - Submitted task task-eb033475 with priority NORMAL
2025-10-06 11:54:34,712 - TaskScheduler - INFO - Submitted task task-3fee1860 with priority HIGH
2025-10-06 11:54:34,722 - TaskScheduler - INFO - Scheduled task task-3dd57bf0 to worker
2025-10-0

Task task1 running for 2 seconds


2025-10-06 11:54:34,737 - TaskScheduler - INFO - Worker worker-cbf776c6 executing task task-3fee1860
2025-10-06 11:54:34,744 - TaskScheduler - INFO - Worker worker-cbf776c6 completed task task-3fee1860
2025-10-06 11:54:34,835 - TaskScheduler - INFO - Task task-3fee1860 completed successfully
2025-10-06 11:54:34,836 - TaskScheduler - INFO - Scheduled task task-8a682add to worker
2025-10-06 11:54:34,838 - TaskScheduler - INFO - Worker worker-cbf776c6 executing task task-8a682add
2025-10-06 11:54:34,854 - TaskScheduler - INFO - Worker worker-cbf776c6 completed task task-8a682add


Submitted tasks:
Task1 (high priority): task-3dd57bf0
Task2 (normal): task-8a682add
Task3 (depends on 1,2): task-9844d296
Task4 (will fail, retries): task-109595f5
Task5 (will timeout): task-eb033475
Task6 (data processing): task-3fee1860

--- Iteration 1 ---
Pending: 5, Running: 0, Completed: 0, Failed: 0
  task-3dd57bf0: running
  task-8a682add: pending
  task-9844d296: pending
  task-3fee1860: running


2025-10-06 11:54:34,939 - TaskScheduler - INFO - Task task-8a682add completed successfully
2025-10-06 11:54:34,941 - TaskScheduler - INFO - Scheduled task task-eb033475 to worker
2025-10-06 11:54:34,941 - TaskScheduler - INFO - Worker worker-cbf776c6 executing task task-eb033475


Task task5 running for 10 seconds

--- Iteration 2 ---
Pending: 1, Running: 2, Completed: 2, Failed: 0
  task-3dd57bf0: running
  task-8a682add: completed
  task-9844d296: pending
  task-3fee1860: completed


2025-10-06 11:54:36,731 - TaskScheduler - INFO - Worker worker-81d75735 completed task task-3dd57bf0
2025-10-06 11:54:36,746 - TaskScheduler - INFO - Task task-3dd57bf0 completed successfully
2025-10-06 11:54:36,747 - TaskScheduler - INFO - Task task-9844d296 dependencies satisfied, queued for execution
2025-10-06 11:54:36,748 - TaskScheduler - INFO - Scheduled task task-9844d296 to worker
2025-10-06 11:54:36,748 - TaskScheduler - INFO - Worker worker-81d75735 executing task task-9844d296


Task task3 running for 1 seconds

--- Iteration 3 ---
Pending: 1, Running: 2, Completed: 2, Failed: 0
  task-3dd57bf0: running
  task-8a682add: completed
  task-9844d296: pending
  task-3fee1860: completed


2025-10-06 11:54:37,793 - TaskScheduler - INFO - Worker worker-81d75735 completed task task-9844d296
2025-10-06 11:54:37,857 - TaskScheduler - INFO - Task task-9844d296 completed successfully
2025-10-06 11:54:37,858 - TaskScheduler - INFO - Scheduled task task-109595f5 to worker
2025-10-06 11:54:37,859 - TaskScheduler - INFO - Worker worker-81d75735 executing task task-109595f5
2025-10-06 11:54:37,863 - TaskScheduler - ERROR - Worker worker-81d75735 failed task task-109595f5: Failed to execute task task-109595f5: This task is designed to fail



--- Iteration 4 ---
Pending: 1, Running: 2, Completed: 3, Failed: 0
  task-3dd57bf0: completed
  task-8a682add: completed
  task-9844d296: running
  task-3fee1860: completed


2025-10-06 11:54:37,961 - TaskScheduler - INFO - Retrying task task-109595f5, 1 retries left
2025-10-06 11:54:37,964 - TaskScheduler - INFO - Scheduled task task-109595f5 to worker
2025-10-06 11:54:37,964 - TaskScheduler - INFO - Worker worker-81d75735 executing task task-109595f5
2025-10-06 11:54:37,968 - TaskScheduler - ERROR - Worker worker-81d75735 failed task task-109595f5: Failed to execute task task-109595f5: This task is designed to fail
2025-10-06 11:54:38,066 - TaskScheduler - INFO - Retrying task task-109595f5, 0 retries left
2025-10-06 11:54:38,067 - TaskScheduler - INFO - Scheduled task task-109595f5 to worker
2025-10-06 11:54:38,067 - TaskScheduler - INFO - Worker worker-81d75735 executing task task-109595f5
2025-10-06 11:54:38,074 - TaskScheduler - ERROR - Worker worker-81d75735 failed task task-109595f5: Failed to execute task task-109595f5: This task is designed to fail
2025-10-06 11:54:38,169 - TaskScheduler - ERROR - Task task-109595f5 failed: Failed to execute task 


Task3 completed with result: Task task3 completed after 1s

Scaling workers to 4...


2025-10-06 11:54:38,742 - TaskScheduler - INFO - Started worker worker-d608ce94
2025-10-06 11:54:38,751 - TaskScheduler - INFO - Started worker worker-8eb6c89f
2025-10-06 11:54:38,747 - TaskScheduler - INFO - Worker worker-d608ce94 started
2025-10-06 11:54:38,761 - TaskScheduler - INFO - Scaled up to 4 workers
2025-10-06 11:54:38,759 - TaskScheduler - INFO - Worker worker-8eb6c89f started
2025-10-06 11:54:40,767 - TaskScheduler - INFO - Scaled down to 1 workers
2025-10-06 11:54:40,768 - TaskScheduler - INFO - Worker worker-8eb6c89f stopped
2025-10-06 11:54:40,769 - TaskScheduler - INFO - Worker worker-81d75735 stopped
2025-10-06 11:54:40,769 - TaskScheduler - INFO - Worker worker-d608ce94 stopped


Scaling workers to 1...


2025-10-06 11:54:40,787 - TaskScheduler - INFO - Started worker worker-b695b129
2025-10-06 11:54:40,805 - TaskScheduler - INFO - Started worker worker-10ef140c
2025-10-06 11:54:40,800 - TaskScheduler - INFO - Worker worker-b695b129 started
2025-10-06 11:54:40,811 - TaskScheduler - INFO - Worker worker-10ef140c started
2025-10-06 11:54:40,923 - TaskScheduler - INFO - Started worker worker-f96a2d87
2025-10-06 11:54:40,929 - TaskScheduler - INFO - Worker worker-f96a2d87 started
2025-10-06 11:54:42,770 - TaskScheduler - INFO - Worker worker-b695b129 stopped



=== Final System Status ===
workers_total: 4
workers_running: 4
tasks_pending: 0
tasks_running: 1
tasks_completed: 4
tasks_failed: 1
Metrics:
  tasks_completed: 4
  tasks_failed: 1
  tasks_cancelled: 0
  tasks_timed_out: 0
  workers_created: 7
  workers_failed: 3

Task1 result: Task task1 completed after 2s
Task6 result: 3.0


2025-10-06 11:54:47,774 - TaskScheduler - INFO - Worker worker-f96a2d87 stopped
2025-10-06 11:54:47,774 - TaskScheduler - INFO - Worker worker-10ef140c stopped
2025-10-06 11:54:47,786 - TaskScheduler - INFO - Task scheduler stopped



Scheduler stopped
