In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skylark.utils import logger

import seaborn as sns

import pickle
import requests
import json
import re
from pathlib import Path

from skylark import skylark_root

data_dir = skylark_root / "data"
figure_dir = data_dir / "figures" / "pareto_speedups_updated"
figure_dir.mkdir(exist_ok=True, parents=True)

plt.style.use("seaborn-bright")
plt.set_cmap("plasma")

In [None]:
df = pd.read_csv("/home/ubuntu/skylark/data/pareto.csv")
# gcp:us-central1-c -> "gcp:us"
# aws:us-east-1 -> "aws:us"
df["src_region"] = df["src"].apply(lambda x: x.split("-")[0])
df["dst_region"] = df["dst"].apply(lambda x: x.split("-")[0])

df["cost_factor"] = df["cost_factor"] * df["instance_limit"]
df["throughput_speedup"] = df["throughput_speedup"] / df["instance_limit"]

# plot scatter w/ cost_factor vs throughput_speedup, color by src with legend
for instance_limit, df_limit in df.groupby("instance_limit"):
    fig, ax = plt.subplots(figsize=(8, 6))
    for (src_region, dst_region), df_grouped in df_limit.groupby(["src", "dst"]):
        df_grouped.sort_values(by="throughput", inplace=True)
        label = "{} to {}".format(src_region, dst_region)
        sns.lineplot(x="throughput", y="cost_factor", data=df_grouped, ax=ax, label=label, alpha=0.8, markers=True)
    ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.6), ncol=3)
    ax.set_title("instance_limit={}".format(instance_limit))
    ax.set_xlabel("Throughput (Gbps)")
    ax.set_ylabel("Cost Factor")

    # white background (no transparency)
    fig.set_facecolor("white")

    fig.savefig(figure_dir / f"pareto_speedups_{instance_limit}.png", bbox_inches="tight", dpi=300)
    fig.savefig(figure_dir / f"pareto_speedups_{instance_limit}.pdf", bbox_inches="tight", dpi=300)

In [None]:
df.describe()