# 4.b. Pipeline Dependency

In [1]:
from collections import defaultdict
from queue import Queue

In [2]:
class TaskScheduler:

    def __init__(self, relations):
        self.relations = relations
        self.dependencies = self._dependency_dict(relations)

    def _dependency_dict(self, relations):
        dependencies = defaultdict(list)
        for task, dependent in relations:
            dependencies[dependent].append(task)
        return dependencies

    def get_task_flow(self, start_tasks, goal_tasks):
        task_flow = []
        queue = Queue()

        for task in goal_tasks:
            queue.put(task)

        while not queue.empty():
            # move a scheduled task up to run earlier
            # if an earlier dependency exists
            if (task := queue.get()) in task_flow:
                task_flow.remove(task)

            task_flow.append(task)
            
            # starting tasks already satisfy dependencies
            if task not in start_tasks:
                # put dependencies of current task in queue
                for dependency in self.dependencies[task]:
                    queue.put(dependency)
        
        return list(reversed(task_flow))


In [3]:
def read_relation_tuples(filepath):
    with open(filepath, 'r') as f:
        lines = f.read().splitlines()

    tuples = [tuple(map(int, line.split('->'))) for line in lines]

    return tuples

In [4]:
starting_tasks = [73]
goal_tasks = [36]

relations = read_relation_tuples('data/relations.txt')
ts = TaskScheduler(relations)

task_flow = ts.get_task_flow(starting_tasks, goal_tasks)
print(f'Run Sequence: {task_flow}')

Run Sequence: [39, 73, 21, 100, 20, 41, 94, 112, 56, 97, 102, 36]
