In [1]:
from pathlib import Path
import os
import pandas as pd
import plotly.express as px
from IPython.display import display
import plotly.io as pio

In [2]:
data_path = Path("data")
actors = data_path.glob("*")
actors = [actor for actor in actors if actor.is_dir()]
actors

[PosixPath('data/server.CIFAR10.20'),
 PosixPath('data/worker.CIFAR10.10'),
 PosixPath('data/server.CIFAR10.10'),
 PosixPath('data/worker.CIFAR10.20'),
 PosixPath('data/standalone.CIFAR10'),
 PosixPath('data/worker.CIFAR10.4'),
 PosixPath('data/worker.CIFAR10.40'),
 PosixPath('data/server.CIFAR10.40'),
 PosixPath('data/server.CIFAR10.4')]

In [3]:
img_path = Path("..", "report", "images")

In [4]:
def load_df(filename: str, date_columns: list[str], date_format=None, skiprows=1, header=0) -> pd.DataFrame:
    dfs = []
    for actor in actors:
        df = pd.read_csv(actor / filename, skiprows=skiprows, header=header)
        for col in date_columns:
            df[col] = df[col].str.replace(pat=r"\(.*\)", repl="", regex=True).str.replace("GMT", "").str.strip()
            df[col] = pd.to_datetime(df[col], format="%a %b %d %Y %H:%M:%S %z")
        df["actor"] = actor.name
        dfs.append(df)
    df = pd.concat(dfs)
    return df

In [5]:
def convert_name_to_human_friendly(name: str) -> str:
    args = name.split(".")
    if "worker" in name:
        n = int(args[-1])
        data = args[-2]
        return f"Worker ({n}, {data})"
    if "server" in name:
        n = int(args[-1])
        data = args[-2]
        return f"Server ({n}, {data})"
    if "standalone" in name:
        return f"Standalone ({args[-1]})"

In [6]:
disk_df = load_df("disk.csv", ["direction"])
iops_df = load_df("iops.csv", ["direction"])
net_df = load_df("net.csv", ["remote_type"])
cpu_df = load_df("cpu.csv", [0], skiprows=5, header=None)
received_bytes_df = load_df("recv.csv", [0], skiprows=5, header=None)
sent_bytes_df = load_df("sent.csv", [0], skiprows=5, header=None)

disk_df["actor"] = disk_df["actor"].apply(convert_name_to_human_friendly)
iops_df["actor"] = iops_df["actor"].apply(convert_name_to_human_friendly)
net_df["actor"] = net_df["actor"].apply(convert_name_to_human_friendly)
cpu_df["actor"] = cpu_df["actor"].apply(convert_name_to_human_friendly)
received_bytes_df["actor"] = received_bytes_df["actor"].apply(convert_name_to_human_friendly)
sent_bytes_df["actor"] = sent_bytes_df["actor"].apply(convert_name_to_human_friendly)

display(disk_df)
display(iops_df)
display(net_df)
display(cpu_df)
display(received_bytes_df)
display(sent_bytes_df)

Unnamed: 0,direction,Write,Read,actor
0,2024-05-22 15:41:00+02:00,32894.283333,4.688167e+02,"Server (20, CIFAR10)"
1,2024-05-22 15:42:00+02:00,32794.766667,2.863350e+03,"Server (20, CIFAR10)"
2,2024-05-22 15:43:00+02:00,40363.766667,1.630188e+05,"Server (20, CIFAR10)"
3,2024-05-22 15:44:00+02:00,62310.283333,1.422792e+06,"Server (20, CIFAR10)"
4,2024-05-22 15:45:00+02:00,274404.500000,1.226090e+06,"Server (20, CIFAR10)"
...,...,...,...,...
53,2024-05-22 20:38:00+02:00,32847.016667,0.000000e+00,"Server (4, CIFAR10)"
54,2024-05-22 20:39:00+02:00,30162.766667,0.000000e+00,"Server (4, CIFAR10)"
55,2024-05-22 20:40:00+02:00,36004.316667,0.000000e+00,"Server (4, CIFAR10)"
56,2024-05-22 20:41:00+02:00,32228.066667,0.000000e+00,"Server (4, CIFAR10)"


Unnamed: 0,direction,Write,Read,actor
0,2024-05-22 15:41:00+02:00,5.283333333333333,0.033333,"Server (20, CIFAR10)"
1,2024-05-22 15:42:00+02:00,5.233333333333333,0.066667,"Server (20, CIFAR10)"
2,2024-05-22 15:43:00+02:00,6.783333333333333,2.050000,"Server (20, CIFAR10)"
3,2024-05-22 15:44:00+02:00,5.05,20.733333,"Server (20, CIFAR10)"
4,2024-05-22 15:45:00+02:00,6.516666666666667,38.350000,"Server (20, CIFAR10)"
...,...,...,...,...
54,2024-05-22 20:39:00+02:00,4.666666666666666,0.000000,"Server (4, CIFAR10)"
55,2024-05-22 20:40:00+02:00,6.116666666666666,0.000000,"Server (4, CIFAR10)"
56,2024-05-22 20:41:00+02:00,5.316666666666666,0.000000,"Server (4, CIFAR10)"
57,2024-05-22 20:42:00+02:00,6.233333333333333,0.016667,"Server (4, CIFAR10)"


Unnamed: 0,remote_type,VMs Different Region (outside europe-central2),External (or VMs Different Project),Google Services,actor,VMs Different Region (outside europe-west4)
0,2024-05-22 15:41:00+02:00,0.000000,0.016667,0.116667,"Server (20, CIFAR10)",
1,2024-05-22 15:42:00+02:00,0.000000,0.100000,0.066667,"Server (20, CIFAR10)",
2,2024-05-22 15:43:00+02:00,0.066667,0.066667,0.100000,"Server (20, CIFAR10)",
3,2024-05-22 15:44:00+02:00,2.550000,0.016667,0.133333,"Server (20, CIFAR10)",
4,2024-05-22 15:45:00+02:00,1.583333,0.016667,0.083333,"Server (20, CIFAR10)",
...,...,...,...,...,...,...
55,2024-05-22 20:40:00+02:00,0.000000,0.050000,0.133333,"Server (4, CIFAR10)",
56,2024-05-22 20:41:00+02:00,0.000000,0.000000,0.150000,"Server (4, CIFAR10)",
57,2024-05-22 20:42:00+02:00,0.000000,0.066667,0.116667,"Server (4, CIFAR10)",
58,2024-05-22 20:43:00+02:00,0.000000,0.083333,0.083333,"Server (4, CIFAR10)",


Unnamed: 0,0,1,actor
0,2024-05-22 15:40:00+02:00,0.033077,"Server (20, CIFAR10)"
1,2024-05-22 15:41:00+02:00,0.042315,"Server (20, CIFAR10)"
2,2024-05-22 15:42:00+02:00,0.026541,"Server (20, CIFAR10)"
3,2024-05-22 15:43:00+02:00,0.187428,"Server (20, CIFAR10)"
4,2024-05-22 15:44:00+02:00,0.397372,"Server (20, CIFAR10)"
...,...,...,...
53,2024-05-22 20:37:00+02:00,0.010214,"Server (4, CIFAR10)"
54,2024-05-22 20:38:00+02:00,0.010808,"Server (4, CIFAR10)"
55,2024-05-22 20:39:00+02:00,0.009158,"Server (4, CIFAR10)"
56,2024-05-22 20:40:00+02:00,0.009402,"Server (4, CIFAR10)"


Unnamed: 0,0,1,actor
0,2024-05-22 15:43:00+02:00,1.128977e+05,"Server (20, CIFAR10)"
1,2024-05-22 15:44:00+02:00,1.379262e+06,"Server (20, CIFAR10)"
2,2024-05-22 15:45:00+02:00,4.383629e+06,"Server (20, CIFAR10)"
3,2024-05-22 15:46:00+02:00,4.005326e+06,"Server (20, CIFAR10)"
4,2024-05-22 15:47:00+02:00,4.029726e+06,"Server (20, CIFAR10)"
...,...,...,...
51,2024-05-22 20:38:00+02:00,7.672333e+02,"Server (4, CIFAR10)"
52,2024-05-22 20:39:00+02:00,7.455833e+02,"Server (4, CIFAR10)"
53,2024-05-22 20:40:00+02:00,7.501500e+02,"Server (4, CIFAR10)"
54,2024-05-22 20:41:00+02:00,7.483833e+02,"Server (4, CIFAR10)"


Unnamed: 0,0,1,actor
0,2024-05-22 15:43:00+02:00,2.236067e+05,"Server (20, CIFAR10)"
1,2024-05-22 15:44:00+02:00,2.747572e+06,"Server (20, CIFAR10)"
2,2024-05-22 15:45:00+02:00,8.736952e+06,"Server (20, CIFAR10)"
3,2024-05-22 15:46:00+02:00,8.013328e+06,"Server (20, CIFAR10)"
4,2024-05-22 15:47:00+02:00,8.024993e+06,"Server (20, CIFAR10)"
...,...,...,...
51,2024-05-22 20:38:00+02:00,3.289833e+02,"Server (4, CIFAR10)"
52,2024-05-22 20:39:00+02:00,3.184333e+02,"Server (4, CIFAR10)"
53,2024-05-22 20:40:00+02:00,8.781000e+02,"Server (4, CIFAR10)"
54,2024-05-22 20:41:00+02:00,5.622333e+02,"Server (4, CIFAR10)"


# Plots

In [7]:
pio.templates.default = "plotly_white"
WIDTH = 1200
HEIGHT = 600

In [8]:
labels = {
    "direction": "Direction",
    "Read": "Read (MB/s)",
    "Write": "Write (MB/s)",
    "actor": "Actor"
}

f = px.line(disk_df, x="direction", y="Read", color="actor", title="Disk throughput (Read)", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "read_disk.png")
f.show()

f = px.line(disk_df, x="direction", y="Write", color="actor", title="Disk throughtput (Write)", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "write_disk.png")
f.show()

In [9]:
labels = {
    "direction": "Time",
    "value": "IOPS",
    "actor": "Actor",
    "remote_type": "Time"
}
f = px.line(iops_df, x="direction", y="Write", color="actor", title="Disk IOPS (Write)", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "iops_write.png")
f.show()

f = px.line(iops_df, x="direction", y="Read", color="actor", title="Disk IOPS (Read)", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "iops_read.png")
f.show()

In [10]:
labels = {
    "remote_type": "Time"
}

f = px.line(net_df, x="remote_type", y="Google Services", color="actor", title="New Connections with Google Services", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "network_usage_google_services.png")
f.show()

f = px.line(net_df, x="remote_type", y="External (or VMs Different Project)", color="actor", title="New Connections with external", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "network_usage_external.png")
f.show()

f = px.line(net_df, x="remote_type", y="VMs Different Region (outside europe-central2)", color="actor", title="New Connections with VMs (outside europe-central2)", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "network_usage_vms_different_region_europe_central2.png")
f.show()

f = px.line(net_df, x="remote_type", y="VMs Different Region (outside europe-west4)", color="actor", title="New Connections with VMs (outside europe-west4)", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "network_usage_vms_different_region_europe_west4.png")
f.show()

f = px.line(sent_bytes_df, x=0, y=1, color="actor", title="Sent bytes", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "sent_bytes.png")
f.show()

f = px.line(received_bytes_df, x=0, y=1, color="actor", title="Received bytes", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "received_bytes.png")
f.show()


In [11]:
labels = {
    "0": "Time",
    "1": "CPU usage"
}
f = px.line(cpu_df, x=0, y=1, color="actor", title="CPU usage", width=WIDTH, height=HEIGHT, labels=labels)
f.write_image(img_path / "cpu_usage.png")
f.show()