In [None]:
import duckdb
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns
import sys

directory_path = os.path.abspath(os.path.join('../utils/'))
if directory_path not in sys.path:
    sys.path.append(directory_path)
from functions import *

# Experiment parameters, need to be set before running this notebook.
EXPERIMENT_ID = [""]
EXPERIMENT_START_TIME = [""]
EVENT_IDS = [""]

# Path information for DuckDB.
DUCKDB_PATH = ""

# Azure metrics info.
TF_PATH = ""
# Determines which metric to display, currently supported are io and api_calls
METRIC = ""

In [None]:
# --- Check input validity and create DB connection --- #

assert (len(EXPERIMENT_ID)==len(EXPERIMENT_START_TIME),
    "The experiment id and start time vectors need to have the same length."
)

# Connect to database.
con = duckdb.connect(database=DUCKDB_PATH, read_only=True)

In [None]:
# --- Data manipulations --- #

# Retrieve relevant data from duckdb.
EXP_DATA = pd.DataFrame()
for idx, id in enumerate(EXPERIMENT_ID):
    EXP_DATA = pd.concat([EXP_DATA, retrieve_experiment_df(con, id, EXPERIMENT_START_TIME[idx])])

# Remove superfluous entries from dataframe.
EXP_DATA = filterByEventIds(EXP_DATA, EVENT_IDS)

# Get Azure metrics.
EXP_DATA = get_azure_metrics(TF_PATH, EXP_DATA)

In [None]:
# --- Plot the data --- #

sns.set(rc={'figure.figsize':(18,6)})
sns.set(font_scale=1.2)

metric = "api_call_count_in_m" if METRIC=="api_calls" else "io_gb"
metric_label = "API Calls (in M)" if METRIC=="api_call_count" else "I/O Volume (in GB)"

# Plot latency (in order of ids).
sns.lineplot(x='event_id', y=metric, hue='exp_name', data=EXP_DATA)
plt.legend(loc='upper left')
plt.ylabel(metric_label)
plt.xlabel("Event ID")