In [1]:
import json
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from analysis_utils import *
import json

In [24]:
seeds = [0, 1, 2, 42, 1234]

# Random Random (multiple seeds)
random_random = []
for seed in seeds:
    with open(f'output/random_random/{seed}/pipelines.json') as f:
        random_random.append(json.load(f))

# Fifo Random (multiple seeds)
fifo_random = []
for seed in seeds:
    with open(f'output/fifo_random/{seed}/pipelines.json') as f:
        fifo_random.append(json.load(f))

# Fifo Round Robin
with open('output/fifo_rr/pipelines.json') as f:
    fifo_rr = json.load(f)

# Kubernetes (multiple runs)
total_runs = 5
kubernetes = []
for run in range(1, total_runs + 1):
    with open(f'output/kfp/run_{run}.json') as f:
       kfp_data = json.load(f)
    kubernetes.append(kfp_get_runs(kfp_data))

# Proposed
with open('output/proposed/pipelines.json') as f:
    proposed = json.load(f)

### Total execution time

In [25]:
total_exec_times = {
    "random-random": total_exec_time_multiple(random_random, std=False),
    "FCFS-random": total_exec_time_multiple(fifo_random, std=False),
    "FCFS-RR": total_exec_time(fifo_rr),
    "kubernetes-scheduler": kfp_total_exec_time_multiple(kubernetes, std=False),
    "proposed": total_exec_time(proposed)
}

reduction_percs = time_reduced_perc(total_exec_times, "proposed")
reduction_ratios = time_reduced_ratio(total_exec_times, "proposed")

print("Total Execution Times:")
print(total_exec_times)

print("\nReduction Percentages:")
print(reduction_percs)

print("\nReduction Ratios:")
print(reduction_ratios)

Total Execution Times:
{'random-random': 859.4, 'FCFS-random': 890.8, 'FCFS-RR': 859, 'kubernetes-scheduler': 404.2, 'proposed': 285}

Reduction Percentages:
{'random-random': -66.84, 'FCFS-random': -68.01, 'FCFS-RR': -66.82, 'kubernetes-scheduler': -29.49}

Reduction Ratios:
{'random-random': 3.02, 'FCFS-random': 3.13, 'FCFS-RR': 3.01, 'kubernetes-scheduler': 1.42}


In [26]:
total_exec_times = {
    "random-random": total_exec_time_multiple(random_random, std=True),
    "FCFS-random": total_exec_time_multiple(fifo_random, std=True),
    "FCFS-RR": total_exec_time(fifo_rr),
    "kubernetes-scheduler": kfp_total_exec_time_multiple(kubernetes, std=True),
    "proposed": total_exec_time(proposed)
}

print("\nTotal Execution Times with Standard Deviation:")
print(total_exec_times)


Total Execution Times with Standard Deviation:
{'random-random': (859.4, 80.24), 'FCFS-random': (890.8, 181.98), 'FCFS-RR': 859, 'kubernetes-scheduler': (404.2, 4.31), 'proposed': 285}


In [27]:
fig = px.bar(
    x=total_exec_times.keys(),
    y=[round(t[0], 1) if isinstance(t, tuple) else round(t, 1) for t in total_exec_times.values()],
    error_y=[round(t[1], 1) if isinstance(t, tuple) else None for t in total_exec_times.values()],
    width=800,
    height=600
)

fig.update_traces(
    marker_color=["#626efb", "#626efb", "#626efb", "#fca05b", "#36cc97"],
    textposition='outside',
    textfont_size=14
)

fig.update_layout(
    xaxis_title='Strategy',
    yaxis_title='Total Execution Time (s)',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(
        showline=True,
        showgrid=False,
        linecolor='black',
        tickfont=dict(color='black')
    ),
    yaxis=dict(
        showline=True,
        showgrid=False,
        linecolor='black',
        tickfont=dict(color='black')
    )
)

fig.show()
fig.write_image("plots/total_exec_time.pdf", engine="kaleido")

### Pipeline execution time

In [28]:
exec_times = {
    "random-random": pipeline_exec_times_multiple(random_random),
    "FCFS-random": pipeline_exec_times_multiple(fifo_random),
    "FCFS-RR": pipeline_exec_times(fifo_rr),
    "kubernetes": kfp_pipeline_exec_times_multiple(kubernetes),
    "proposed": pipeline_exec_times(proposed)
}

In [29]:
records = []
for strategy, pipelines in exec_times.items():
    for pipeline, value in pipelines.items():
        records.append({
            "strategy": strategy,
            "pipeline": pipeline,
            "execution_time": value
        })

df = pd.DataFrame(records)

custom_colors = {
    "random-random": "#626efb",
    "FCFS-random": "#f1543b",
    "FCFS-RR": "#aa64fb",
    "kubernetes": "#fca05b",
    "proposed": "#36cc97"
}

# Grouped bar chart grouped by Strategy
fig = px.bar(
    df,
    x="pipeline",
    y="execution_time",
    color="strategy",
    color_discrete_map=custom_colors,
    barmode="group",
    height=500,
    width=1000
)

fig.update_layout(
    xaxis_title="Pipeline",
    yaxis_title="Execution Time (s)",
    bargap=0.3,
    legend=dict(
        title_text='',       # removes "strategy" from legend
        orientation="h",     # horizontal layout
        yanchor="bottom",    # anchor the legend box by its bottom
        y=1,                 # position above the plot area
        xanchor="right",     # anchor the box to the right
        x=1                  # align to far right
    ),
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(
        showline=True,
        showgrid=False,
        linecolor='black',
        tickfont=dict(color='black')
    ),
    yaxis=dict(
        showline=True,
        showgrid=True,
        gridcolor='#dedede',
        linecolor='black',
        tickfont=dict(color='black')
    )
)

fig.show()
fig.write_image("plots/pipeline_exec_time.pdf", engine="kaleido")

### Average waiting time

In [30]:
avg_wait_times = {
    "random-random": pipeline_wait_times_avg_multiple(experiments=random_random, std=False),
    "FCFS-random": pipeline_wait_times_avg_multiple(experiments=fifo_random, std=False),
    "FCFS-RR": pipeline_wait_times_avg(pipelines=fifo_rr),
    "proposed": pipeline_wait_times_avg(pipelines=proposed)
}
reduction_percs = time_reduced_perc(avg_wait_times, "proposed")
reduction_ratios = time_reduced_ratio(avg_wait_times, "proposed")

print("\nAverage Wait Times:")
print(avg_wait_times)

print("\nReduction Percentages:")
print(reduction_percs)

print("\nReduction Ratios:")
print(reduction_ratios)


Average Wait Times:
{'random-random': 245.46, 'FCFS-random': 281.92, 'FCFS-RR': 258.1, 'proposed': 54.3}

Reduction Percentages:
{'random-random': -77.88, 'FCFS-random': -80.74, 'FCFS-RR': -78.96}

Reduction Ratios:
{'random-random': 4.52, 'FCFS-random': 5.19, 'FCFS-RR': 4.75}


In [31]:
avg_wait_times = {
    "random-random": pipeline_wait_times_avg_multiple(experiments=random_random, std=True),
    "FCFS-random": pipeline_wait_times_avg_multiple(experiments=fifo_random, std=True),
    "FCFS-RR": pipeline_wait_times_avg(pipelines=fifo_rr),
    "proposed": pipeline_wait_times_avg(pipelines=proposed)
}

print("\nAverage Wait Times with Standard Deviation:")
print(avg_wait_times)


Average Wait Times with Standard Deviation:
{'random-random': (245.46, 41.03), 'FCFS-random': (281.92, 76.86), 'FCFS-RR': 258.1, 'proposed': 54.3}


In [32]:
fig = px.bar(
    x=avg_wait_times.keys(),
    y=[wt[0] if isinstance(wt, tuple) else wt for wt in avg_wait_times.values()],
    error_y=[wt[1] if isinstance(wt, tuple) else None for wt in avg_wait_times.values()],
    width=800,
    height=600
)

fig.update_traces(
    marker_color=["#626efb", "#626efb", "#626efb", "#36cc97"],
    textposition='outside',
    textfont_size=14
)

fig.update_layout(
    # title_x=0.5,
    xaxis_title='Strategy',
    yaxis_title='Average Wait Time (s)',
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(
        showline=True,
        showgrid=False,
        linecolor='black',
        tickfont=dict(color='black')
    ),
    yaxis=dict(
        showline=True,
        showgrid=False,
        linecolor='black',
        tickfont=dict(color='black')
    )
)

fig.show()
fig.write_image("plots/avg_wait_time.pdf", engine="kaleido")

### Pipeline waiting time

In [33]:
wait_times = {
    "random-random": pipeline_wait_times_multiple(experiments=random_random),
    "FCFS-random": pipeline_wait_times_multiple(experiments=fifo_random),
    "FCFS-RR": pipeline_wait_times(pipelines=fifo_rr),
    "proposed": pipeline_wait_times(pipelines=proposed)
}

In [34]:
records = []
for strategy, pipelines in wait_times.items():
    for pipeline, value in pipelines.items():
        records.append({
            "strategy": strategy,
            "pipeline": pipeline,
            "waiting_time": value
        })

df = pd.DataFrame(records)

custom_colors = {
    "random-random": "#626efb",
    "FCFS-random": "#f1543b",
    "FCFS-RR": "#aa64fb",
    "kubernetes": "#fca05b",
    "proposed": "#36cc97"
}

# Grouped bar chart grouped by Strategy
fig = px.bar(
    df,
    x="pipeline",
    y="waiting_time",
    color="strategy",
    color_discrete_map=custom_colors,
    barmode="group",
    height=500,
    width=1000
)

fig.update_layout(
    # title="Waiting Time per Strategy for Each Pipeline",
    # title_x=0.5,
    xaxis_title="Pipeline",
    yaxis_title="Waiting Time (s)",
    bargap=0.3,
    legend=dict(
        title_text='',       # removes "strategy" from legend
        orientation="h",     # horizontal layout
        yanchor="bottom",    # anchor the legend box by its bottom
        y=1,                 # position above the plot area
        xanchor="right",     # anchor the box to the right
        x=1                  # align to far right
    ),
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(
        showline=True,
        showgrid=False,
        linecolor='black',
        tickfont=dict(color='black')
    ),
    yaxis=dict(
        showline=True,
        showgrid=True,
        gridcolor='#dedede',
        linecolor='black',
        tickfont=dict(color='black')
    )
)

fig.show()
fig.write_image("plots/pipeline_wait_time.pdf", engine="kaleido")

### Running and waiting pipelines over time

In [35]:
def plot_n_pipelines(pipeline_numbers, time_windows, title, filename, comparison_line=None):
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=pipeline_numbers["elapsed_time"],
        y=pipeline_numbers["running_pipelines"],
        mode='lines',
        name='Running',
        fill='tozeroy',
        line=dict(color='blue')
    ))

    fig.add_trace(go.Scatter(
        x=pipeline_numbers["elapsed_time"],
        y=pipeline_numbers["waiting_pipelines"],
        mode='lines',
        name='Waiting',
        fill='tozeroy',
        line=dict(color='orange')
    ))

    for i, time in enumerate(time_windows):
        fig.add_vline(
            x=time,
            line=dict(color="green", dash="dash"),
            opacity=1,
            annotation_text=f"S{i+1}",
            annotation_position="top",
            annotation=dict(font=dict(size=12, color="green"))
        )

    if comparison_line is not None:
        fig.add_vline(
            x=comparison_line,
            name="proposed",
            line=dict(color="red", dash="dot"),
            opacity=1,
            annotation_text="Proposed",
            annotation_position="top",
            annotation=dict(font=dict(size=12, color="red"))
        )

    fig.update_layout(
        # title=title,
        # title_x=0.5,
        xaxis_title="Elapsed Time (s)",
        yaxis_title="Number of Pipelines",
        legend=dict(
            title_text='',       # legend title
            orientation="h",     # horizontal layout
            yanchor="bottom",    # anchor the legend box by its bottom
            y=1,                 # position above the plot area
            xanchor="right",     # anchor the box to the right
            x=1                  # align to far right
        ),
        width=1000,
        height=600,
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        xaxis=dict(
            showline=True,
            showgrid=False,
            linecolor='black',
            tickfont=dict(color='black')
        ),
        yaxis=dict(
            dtick=1,
            showline=True,
            showgrid=True,
            gridcolor='#dedede',
            linecolor='black',
            tickfont=dict(color='black')
        )
    )

    fig.show()
    fig.write_image(f"plots/{filename}.pdf", engine="kaleido")

#### Proposed

In [36]:
pipeline_numbers = pd.read_csv("output/proposed/n_pipelines_m2.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (Proposed Strategy)",
    filename="n_pipelines_proposed"
)

#### Fifo-rr

In [37]:
pipeline_numbers = pd.read_csv("output/fifo_rr/n_pipelines_m2.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (FCFS Round-Robin Strategy)",
    filename="n_pipelines_fifo_rr",
    comparison_line=813
)

#### Fifo-random

In [38]:
pipeline_numbers = pd.read_csv("output/fifo_random/42/n_pipelines_m2.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (FCFS Random Strategy)",
    filename="n_pipelines_fifo_random",
    comparison_line=813
)

#### Random-random

In [39]:
pipeline_numbers = pd.read_csv("output/random_random/42/n_pipelines_m2.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (Random-Random Strategy)",
    filename="n_pipelines_random_random",
    comparison_line=813
)

### Total elapsed time comparison

In [40]:
# Random Random (different seeds)
with open('output/random_random/42/pipelines_m2.json') as f:
    random_random = json.load(f)

# Fifo Random
with open('output/fifo_random/42/pipelines_m2.json') as f:
    fifo_random = json.load(f)

# Fifo Round Robin
with open('output/fifo_rr/pipelines_m2.json') as f:
    fifo_rr = json.load(f)

# Proposed
with open('output/proposed/pipelines_m2.json') as f:
    proposed = json.load(f)

In [41]:
total_exec_times = {
    "random-random": total_exec_time(random_random),
    "FCFS-random": total_exec_time(fifo_random),
    "FCFS-RR": total_exec_time(fifo_rr),
    "proposed": total_exec_time(proposed)
}

reduction_percs = time_reduced_perc(total_exec_times, "proposed")
reduction_ratios = time_reduced_ratio(total_exec_times, "proposed")
print(total_exec_times)
print(reduction_percs)
print(reduction_ratios)

{'random-random': 1655, 'FCFS-random': 1810, 'FCFS-RR': 1405, 'proposed': 813}
{'random-random': -50.88, 'FCFS-random': -55.08, 'FCFS-RR': -42.14}
{'random-random': 2.04, 'FCFS-random': 2.23, 'FCFS-RR': 1.73}
