<a href="https://colab.research.google.com/github/paridhika/DDL/blob/main/DAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import csv
import re

class DAGNode:
    def __init__(self, task_name, job_name, start_time, end_time, status,cpu_avg,mem_avg,instance_num ):
        self.task_name = task_name
        self.job_name = job_name
        self.start_time = start_time
        self.end_time = end_time
        self.status = status
        self.dependencies = []
        self.cpu_avg = cpu_avg
        self.mem_avg = mem_avg
        self.instance_num  = instance_num

    def add_dependency(self, dependency_list):
        self.dependencies = dependency_list


class DAGJob:
    def __init__(self, job_name):
        self.job_name = job_name
        self.nodes = []
        self.dependency_matrix = None  # Will be populated during processing

    def add_node(self, node):
        self.nodes.append(node)

    def generate_dependency_matrix(self):
      num_nodes = len(self.nodes)
      self.dependency_matrix = [[0] * num_nodes for _ in range(num_nodes)]

      for node in self.nodes:
          # print(str(node.task_name) + " : " + node.dependencies)
          if len(node.dependencies) != 0:
              # print(node.dependencies)
              for dependency_task in node.dependencies:
                  # print(dependency_task)
                  self.dependency_matrix[int(node.task_name)-1][int(dependency_task)-1] = 1

    def display_nodes(self):
        for node in self.nodes:
            print(f"Task: {node.task_name}, Job: {node.job_name}, Start Time: {node.start_time}, End Time: {node.end_time}, Status: {node.status}")

    def display_dependency_matrix(self):
        for row in self.dependency_matrix:
            print(row)


def create_dags_from_csv(file_path):
    dag_jobs = {}

    with open(file_path, 'r') as csvfile:

        reader = csv.DictReader(csvfile)

        for row in reader:
            job_name = row['job_name']
            task_name = row['task_name']
            start_time = row['start_time']
            end_time = row['end_time']
            status = row['status']
            cpu_avg=row['plan_cpu']
            mem_avg=row['plan_mem']
            instance_num = row['instance_num']

            # Extract the first number using regular expression
            match = re.search(r'\d+', task_name)

            if match:
                first_number = int(match.group())

            node = DAGNode(first_number, job_name, start_time, end_time, status,cpu_avg,mem_avg,instance_num )

            if job_name not in dag_jobs:
                dag_jobs[job_name] = DAGJob(job_name)

            dag_jobs[job_name].add_node(node)

            # Extract dependencies from task_name
            dependencies = [part for part in task_name.split('_') if part.isdigit()]
            node.add_dependency(dependencies)

    return dag_jobs


# Example usage:
csv_file_path = 'tasks_11_to_50.csv'
dag_jobs = create_dags_from_csv(csv_file_path)



In [2]:
for key,values in dag_jobs.items():
    values.generate_dependency_matrix()

IndexError: ignored

In [3]:
!pip install graphviz

def matrix_to_dot(matrix, node_labels=None):
    """
    Convert an adjacency matrix to Graphviz DOT format.

    Parameters:
    - matrix: List of lists representing the adjacency matrix.
    - node_labels: Optional list of node labels.

    Returns:
    - dot_content: String containing the DOT representation.
    """
    num_nodes = len(matrix)

    # Create DOT header
    dot_content = "digraph MyDAG {\n"

    # Add nodes
    for node in range(num_nodes):
        label = str(node + 1) if node_labels is None else str(node_labels[node])
        dot_content += f'  {node + 1} [label="{label}"];\n'

    # Add edges
    for i in range(num_nodes):
        for j in range(num_nodes):
            if matrix[i][j] == 1:
                dot_content += f'  {i + 1} -> {j + 1};\n'

    # Close DOT file
    dot_content += "}"

    return dot_content





In [None]:
print(len(dag_jobs))
for key,values in dag_jobs.items():
    name = []

    for node in values.nodes:
        name.append(node.task_name)
    dot_content = matrix_to_dot(values.dependency_matrix,name)
    # Save DOT file
    with open(key+".dot", "w") as dot_file:
        dot_file.write(dot_content)

In [5]:
import os
from graphviz import Source

# Directory containing DOT files
dot_directory = "."

# Output directory for PNG images
png_output_directory = "./output"

# Ensure the output directory exists
os.makedirs(png_output_directory, exist_ok=True)

# Iterate over DOT files in the directory
for dot_filename in os.listdir(dot_directory):
    if dot_filename.endswith(".dot"):
        # Construct the full paths for DOT and PNG files
        dot_file_path = os.path.join(dot_directory, dot_filename)
        png_output_path = os.path.join(png_output_directory, os.path.splitext(dot_filename)[0])

        # Read the DOT file
        source = Source.from_file(dot_file_path, format="png")

        # Save the PNG image
        source.render(png_output_path, format="png", cleanup=True)

        print(f"PNG image created: {png_output_path}")



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
PNG image created: ./output/j_448146
PNG image created: ./output/j_36018
PNG image created: ./output/j_12471
PNG image created: ./output/j_429556
PNG image created: ./output/j_417208
PNG image created: ./output/j_271621
PNG image created: ./output/j_154532
PNG image created: ./output/j_273112
PNG image created: ./output/j_140896
PNG image created: ./output/j_186992
PNG image created: ./output/j_414675
PNG image created: ./output/j_203183
PNG image created: ./output/j_91270
PNG image created: ./output/j_227862
PNG image created: ./output/j_374234
PNG image created: ./output/j_417903
PNG image created: ./output/j_244882
PNG image created: ./output/j_90040
PNG image created: ./output/j_464962
PNG image created: ./output/j_80741
PNG image created: ./output/j_211203
PNG image created: ./output/j_407017
PNG image created: ./output/j_410264
PNG image created: ./output/j_333538
PNG image created: ./output/j_347903
PNG image creat