In [None]:
import duckdb
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns
import sys

directory_path = os.path.abspath(os.path.join('../utils/'))
if directory_path not in sys.path:
    sys.path.append(directory_path)
from functions import *
from constant import *

# Experiment parameters, need to be set before running this notebook.
EXPERIMENT_ID = [""]
EXPERIMENT_START_TIME = [""]
EVENT_IDS = [""]

# Path to the storage folder where the experiment table(s) where stored.
STORAGE_PATH = ""
# Storage type that metrics should be extracted from, currently supported is 'AZURE'
STORAGE_TYPE = ""
# Determines which metric to display, currently supported are 'io_gb' and 'api_calls'
METRIC = ""

In [None]:
# --- Check input validity and create DB connection --- #

assert len(EXPERIMENT_ID)==len(EXPERIMENT_START_TIME)

# Connect to database.
con = duckdb.connect(database=DUCKDB_PATH, read_only=True)

In [None]:
# --- Data manipulations --- #

# Retrieve relevant data from duckdb.
EXP_DATA = pd.DataFrame()
for idx, id in enumerate(EXPERIMENT_ID):
    EXP_DATA = pd.concat([EXP_DATA, retrieve_experiment_df(con, id, EXPERIMENT_START_TIME[idx])])

# Remove superfluous entries from dataframe.
EXP_DATA = filterByEventIds(EXP_DATA, EVENT_IDS)

In [None]:
# --- Storage extraction --- #
storage_metrics = get_storage_metrics(STORAGE_TYPE, EXP_DATA, STORAGE_PATH)
storage_metrics.fetch_metrics()

In [None]:
# --- Plot the data --- #
sns.set(rc={'figure.figsize':(18,6)})
sns.set(font_scale=1.2)

sns.lineplot(x='event_id', y=METRIC, hue='exp_name', data=storage_metrics.get_df())
plt.legend(loc='upper left')
plt.ylabel(storage_metrics.get_label(METRIC))
plt.xlabel("Event ID")