In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from loguru import logger
from tqdm import tqdm

import pickle
import json
import re
from pathlib import Path

data_dir = Path(globals()["_dh"][0]).parent / "data"
figure_dir = data_dir / "figures"
figure_dir.mkdir(exist_ok=True)

plt.style.use("seaborn-bright")
plt.set_cmap("plasma")

<Figure size 432x288 with 0 Axes>

In [5]:
rows = []
throughput_files = list((data_dir / "throughput" / "raw_data").glob("*.json"))
for file in tqdm(throughput_files):
    fname = file.name  # e.g. throughput_2021-11-20_14-10-46.json
    date_time = re.search(r"\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}", fname).group(0)
    datetime_parsed = pd.to_datetime(date_time, format="%Y-%m-%d_%H-%M-%S")
    throughput_data = json.load(open(file))
    for row in throughput_data:
        rows.append(
            dict(
                timestamp=datetime_parsed,
                src=row["src"],
                dst=row["dst"],
                throughput_sent=row["throughput_sent"],
                throughput_received=row["throughput_received"],
            )
        )
df = pd.DataFrame(rows)

100%|██████████| 33/33 [00:05<00:00,  6.57it/s]


In [6]:
# for each src, make a single plot showing the throughput_sent over time for all dst
(figure_dir / "throughput_sent").mkdir(exist_ok=True, parents=True)
for src in tqdm(df.src.unique()):
    df_src = df.loc[df.src == src]
    df_src.timestamp = df_src.timestamp.dt.tz_localize(None)
    # group by dst and plot the throughput_sent over time, each dst as one line
    # make ax as subplot with figsize=(10, 5)
    fig, ax = plt.subplots(figsize=(10, 5))
    for dst in df_src.dst.unique():
        df_dst = df_src.loc[df_src.dst == dst]
        df_dst.plot(x="timestamp", y="throughput_sent", ax=ax, label=dst)
    ax.set_title(f"{src}")
    ax.set_ylabel("throughput_sent (bytes/s)")

    # show legend with padding on the right, with sorted legend entries by np.mean(throughput_sent)
    handles, labels = ax.get_legend_handles_labels()
    labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0], reverse=False))
    ax.legend(handles, labels, loc="upper right", bbox_to_anchor=(1.375, 1.0))
    plt.tight_layout()
    # pad the right of the plot to make room for the legend
    plt.subplots_adjust(right=0.75)
    plt.savefig(figure_dir / "throughput_sent" / f"{src}.png")
    plt.close()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [7]:
# write the dataframe to a pickle file
df.to_csv(data_dir / "throughput" / "df_throughput_samples.csv")

In [8]:
# write aggregated data (src, dst, mean(throughput_sent), mean(throughput_received)) to a pickle file. unflatten key.
df_agg = df.groupby(["src", "dst"]).agg({"throughput_sent": np.mean, "throughput_received": np.mean})
df_agg.reset_index(inplace=True)
df_agg.to_csv(data_dir / "throughput" / "df_throughput_agg.csv")