In [None]:
import json
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from analysis import *

In [None]:
# Random Random (different seeds)
with open('output/random_random/0/pipelines.json') as f:
    random_random_0 = json.load(f)
with open('output/random_random/1/pipelines.json') as f:
    random_random_1 = json.load(f)
with open('output/random_random/2/pipelines.json') as f:
    random_random_2 = json.load(f)
with open('output/random_random/42/pipelines.json') as f:
    random_random_3 = json.load(f)
with open('output/random_random/1234/pipelines.json') as f:
    random_random_4 = json.load(f)
random_random = [random_random_0, random_random_1, random_random_2, random_random_3, random_random_4]


# Fifo Random
with open('output/fifo_random/0/pipelines.json') as f:
    fifo_random_0 = json.load(f)
with open('output/fifo_random/1/pipelines.json') as f:
    fifo_random_1 = json.load(f)
with open('output/fifo_random/2/pipelines.json') as f:
    fifo_random_2 = json.load(f)
with open('output/fifo_random/42/pipelines.json') as f:
    fifo_random_3 = json.load(f)
with open('output/fifo_random/1234/pipelines.json') as f:
    fifo_random_4 = json.load(f)
fifo_random = [fifo_random_0, fifo_random_1, fifo_random_2, fifo_random_3, fifo_random_4]

# Fifo Round Robin
with open('output/fifo_rr/pipelines.json') as f:
    fifo_rr = json.load(f)


# Kubernetes
with open('output/kfp/run_1.json', 'r') as file:
    kfp_data_1 = json.load(file)
with open('output/kfp/run_2.json', 'r') as file:
    kfp_data_2 = json.load(file)
with open('output/kfp/run_3.json', 'r') as file:
    kfp_data_3 = json.load(file)
with open('output/kfp/run_4.json', 'r') as file:
    kfp_data_4 = json.load(file)
with open('output/kfp/run_5.json', 'r') as file:
    kfp_data_5 = json.load(file)
kubernetes = [kfp_get_runs(kfp_data_1), kfp_get_runs(kfp_data_2), kfp_get_runs(kfp_data_3), kfp_get_runs(kfp_data_4), kfp_get_runs(kfp_data_5)]
    

# Proposed
with open('output/proposed/pipelines.json') as f:
    proposed = json.load(f)

### Total execution time

In [None]:
total_exec_times = {
    "random-random": total_exec_time_multiple(random_random),
    "fifo-random": total_exec_time_multiple(fifo_random),
    "fifo-RR": total_exec_time(fifo_rr),
    "kubernetes-scheduler": kfp_total_exec_time_multiple(kubernetes),
    "proposed": total_exec_time(proposed)
}
reduction_percs = time_reduced_perc(total_exec_times, "proposed")
reduction_ratios = time_reduced_ratio(total_exec_times, "proposed")

print(total_exec_times)
print(reduction_percs)
print(reduction_ratios)

In [None]:
fig = px.bar(
    x=total_exec_times.keys(),
    y=total_exec_times.values(),
    width=800,
    height=600
)

fig.update_traces(
    marker_color=["#636EFA", "#636EFA", "#636EFA", "#636EFA", "#00CC96"],
    text=list(total_exec_times.values()),
    textposition='outside',
    textfont_size=15
)

fig.update_layout(
    title="Total Execution Time of Different Strategies",
    title_x=0.5,
    xaxis_title='Strategy',
    yaxis_title='Total Execution Time (s)'
)

fig.show()
fig.write_image("plots/total_exec_time.pdf", engine="kaleido")

### Pipeline execution time

In [None]:
exec_times = {
    "random-random": pipeline_exec_times_multiple(random_random),
    "fifo-random": pipeline_exec_times_multiple(fifo_random),
    "fifo-RR": pipeline_exec_times(fifo_rr),
    "kubernetes": kfp_pipeline_exec_times_multiple(kubernetes),
    "proposed": pipeline_exec_times(proposed)
}

In [None]:
records = []
for strategy, pipelines in exec_times.items():
    for pipeline, value in pipelines.items():
        records.append({
            "strategy": strategy,
            "pipeline": pipeline,
            "execution_time": value
        })

df = pd.DataFrame(records)

# Grouped bar chart grouped by Strategy
fig = px.bar(
    df,
    x="pipeline",
    y="execution_time",
    color="strategy",
    barmode="group",
    height=600,
    width=1600
)

fig.update_layout(
    title="Execution Time per Strategy for Each Pipeline",
    title_x=0.5,
    xaxis_title="Strategy",
    yaxis_title="Execution Time (s)",
    bargap=0.4
)

fig.show()
fig.write_image("plots/pipeline_exec_time.pdf", engine="kaleido")

### Average waiting time

In [None]:
avg_wait_times = {
    "random-random": pipeline_wait_times_avg(experiments=random_random),
    "fifo-random": pipeline_wait_times_avg(experiments=fifo_random),
    "fifo-RR": pipeline_wait_times_avg(pipelines=fifo_rr),
    "proposed": pipeline_wait_times_avg(pipelines=proposed)
}
reduction_percs = time_reduced_perc(avg_wait_times, "proposed")
reduction_ratios = time_reduced_ratio(avg_wait_times, "proposed")

print(avg_wait_times)
print(reduction_percs)
print(reduction_ratios)

In [None]:
fig = px.bar(
    x=avg_wait_times.keys(),
    y=avg_wait_times.values(),
    width=800,
    height=600
)

fig.update_traces(
    marker_color=["#636EFA", "#636EFA", "#636EFA", "#00CC96"],
    text=list(avg_wait_times.values()),
    textposition='outside',
    textfont_size=15
)

fig.update_layout(
    title="Average Wait Time of Different Strategies",
    title_x=0.5,
    xaxis_title='Strategy',
    yaxis_title='Average Wait Time (s)'
)

fig.show()
fig.write_image("plots/avg_wait_time.pdf", engine="kaleido")

### Pipeline waiting time

In [None]:
wait_times = {
    "random-random": pipeline_wait_times_multiple(experiments=random_random),
    "fifo-random": pipeline_wait_times_multiple(experiments=fifo_random),
    "fifo-RR": pipeline_wait_times(pipelines=fifo_rr),
    "proposed": pipeline_wait_times(pipelines=proposed)
}

In [None]:
records = []
for strategy, pipelines in wait_times.items():
    for pipeline, value in pipelines.items():
        records.append({
            "strategy": strategy,
            "pipeline": pipeline,
            "waiting_time": value
        })

df = pd.DataFrame(records)

# Grouped bar chart grouped by Strategy
fig = px.bar(
    df,
    x="pipeline",
    y="waiting_time",
    color="strategy",
    barmode="group",
    height=600,
    width=1600
)

fig.update_layout(
    title="Waiting Time per Strategy for Each Pipeline",
    title_x=0.5,
    xaxis_title="Strategy",
    yaxis_title="Waiting Time (s)",
    bargap=0.4
)

fig.show()
fig.write_image("plots/pipeline_wait_time.pdf", engine="kaleido")

### Running and waiting time through time

In [None]:
def plot_n_pipelines(pipeline_numbers, time_windows, title, filename):
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=pipeline_numbers["elapsed_time"],
        y=pipeline_numbers["running_pipelines"],
        mode='lines',
        name='Running',
        fill='tozeroy',
        line=dict(color='blue')
    ))

    fig.add_trace(go.Scatter(
        x=pipeline_numbers["elapsed_time"],
        y=pipeline_numbers["waiting_pipelines"],
        mode='lines',
        name='Waiting',
        fill='tozeroy',
        line=dict(color='orange')
    ))

    for time in time_windows:
        fig.add_vline(
            x=time,
            line=dict(color="green", dash="dash"),
            opacity=1,
        )

    fig.update_layout(
        title=title,
        title_x=0.5,
        xaxis_title="Elapsed Time (s)",
        yaxis_title="Number of Pipelines",
        legend_title="Pipeline State",
        width=1000,
        height=600,
        yaxis=dict(
            dtick=1,
        ),
    )

    fig.show()
    fig.write_image(f"plots/{filename}.pdf", engine="kaleido")

#### Proposed

In [None]:
pipeline_numbers = pd.read_csv("output/proposed/n_pipelines_m.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (Proposed Strategy)",
    filename="n_pipelines_proposed"
)

#### Fifo-rr

In [None]:
pipeline_numbers = pd.read_csv("output/fifo_rr/n_pipelines_m.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (FIFO Round-Robin Strategy)",
    filename="n_pipelines_fifo_rr"
)

#### Fifo-random

In [None]:
pipeline_numbers = pd.read_csv("output/fifo_random/42/n_pipelines_m.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (FIFO Random Strategy)",
    filename="n_pipelines_fifo_random"
)

#### Random-random

In [None]:
pipeline_numbers = pd.read_csv("output/random_random/42/n_pipelines_m.csv")

# Add elapsed time column
start_timestamp = min(pipeline_numbers["timestamp"])
pipeline_numbers["elapsed_time"] = [int(t-start_timestamp) for t in pipeline_numbers["timestamp"]]

# Separate new time windows and pipeline updates
time_windows = pipeline_numbers[pipeline_numbers["type"] == "new_window"]["elapsed_time"]
pipeline_numbers = pipeline_numbers[pipeline_numbers["type"] == "update"]

plot_n_pipelines(
    pipeline_numbers,
    time_windows,
    title="Number of Pipelines Over Time (Random-Random Strategy)",
    filename="n_pipelines_random_random"
)