In [1]:
from pathlib import Path
import os
import pandas as pd
import plotly.express as px
from IPython.display import display

In [2]:
data_path = Path("data")
actors = data_path.glob("*")
actors = [actor for actor in actors if actor.is_dir()]
actors

[PosixPath('data/standalone'),
 PosixPath('data/server'),
 PosixPath('data/worker')]

In [34]:
def load_df(filename: str, date_columns: list[str], date_format=None, skiprows=1, header=0) -> pd.DataFrame:
    dfs = []
    for actor in actors:
        df = pd.read_csv(actor / filename, skiprows=skiprows, header=header)
        for col in date_columns:
            df[col] = df[col].str.replace(pat=r"\(.*\)", repl="", regex=True).str.replace("GMT", "").str.strip()
            df[col] = pd.to_datetime(df[col], format="%a %b %d %Y %H:%M:%S %z")
        df["actor"] = actor.name
        dfs.append(df)
    df = pd.concat(dfs)
    return df

In [35]:
disk_df = load_df("disk.csv", ["direction"])
iops_df = load_df("iops.csv", ["direction"])
net_df = load_df("net.csv", ["remote_type"])
cpu_df = load_df("VM_Instance_-_CPU_utilization_[MEAN].csv", [0], skiprows=5, header=None)
received_bytes_df = load_df("VM_Instance_-_Received_bytes_[SUM],_Sent_bytes_[SUM]_1.csv", [0], skiprows=5, header=None)
sent_bytes_df = load_df("VM_Instance_-_Received_bytes_[SUM],_Sent_bytes_[SUM]_2.csv", [0], skiprows=5, header=None)
display(disk_df)
display(iops_df)
display(net_df)
display(cpu_df)
display(received_bytes_df)
display(sent_bytes_df)

Unnamed: 0,direction,Write,Read,actor
0,2024-05-21 14:39:00+02:00,297183.066667,12970124.233333,standalone
1,2024-05-21 14:40:00+02:00,91463.333333,2532996.55,standalone
2,2024-05-21 14:41:00+02:00,41170.266667,2488.433333,standalone
3,2024-05-21 14:42:00+02:00,35164.000000,80356.05,standalone
4,2024-05-21 14:43:00+02:00,51493.900000,19377.433333,standalone
...,...,...,...,...
713,2024-05-22 00:42:00+02:00,30512.050000,2826.4,worker
714,2024-05-22 00:43:00+02:00,34470.516667,0.0,worker
715,2024-05-22 00:44:00+02:00,27173.700000,821065.1,worker
716,2024-05-22 00:45:00+02:00,40083.333333,100641.683333,worker


Unnamed: 0,direction,Write,Read,actor
0,2024-05-21 14:39:00+02:00,25.633333333333333,292.516667,standalone
1,2024-05-21 14:40:00+02:00,9.916666666666666,57.216667,standalone
2,2024-05-21 14:41:00+02:00,6.6499999999999995,0.216667,standalone
3,2024-05-21 14:42:00+02:00,5.85,1.750000,standalone
4,2024-05-21 14:43:00+02:00,8.016666666666666,0.450000,standalone
...,...,...,...,...
54,2024-05-22 00:43:00+02:00,5.533333333333333,0.000000,worker
55,2024-05-22 00:44:00+02:00,4,61.250000,worker
56,2024-05-22 00:45:00+02:00,5.866666666666666,7.516667,worker
57,2024-05-22 00:46:00+02:00,5.116666666666666,0.000000,worker


Unnamed: 0,remote_type,External (or VMs Different Project),Google Services,actor,VMs Different Region (outside europe-central2),VMs Different Region (outside europe-west4)
0,2024-05-21 13:39:00+02:00,undefined,0.000000,standalone,,
1,2024-05-21 13:40:00+02:00,0,0.000000,standalone,,
2,2024-05-21 13:41:00+02:00,0,0.000000,standalone,,
3,2024-05-21 13:42:00+02:00,0,0.000000,standalone,,
4,2024-05-21 13:43:00+02:00,0,0.000000,standalone,,
...,...,...,...,...,...,...
715,2024-05-22 00:44:00+02:00,0.0,0.150000,worker,,0.183333
716,2024-05-22 00:45:00+02:00,0.05,0.066667,worker,,0.983333
717,2024-05-22 00:46:00+02:00,0.033333,0.066667,worker,,0.000000
718,2024-05-22 00:47:00+02:00,0.0,0.050000,worker,,0.000000


Unnamed: 0,0,1,actor
0,2024-05-21 14:39:00+02:00,0.238000,standalone
1,2024-05-21 14:40:00+02:00,0.025575,standalone
2,2024-05-21 14:41:00+02:00,0.015236,standalone
3,2024-05-21 14:42:00+02:00,0.021075,standalone
4,2024-05-21 14:43:00+02:00,0.023321,standalone
...,...,...,...
712,2024-05-22 00:41:00+02:00,0.030453,worker
713,2024-05-22 00:42:00+02:00,0.029386,worker
714,2024-05-22 00:43:00+02:00,0.026338,worker
715,2024-05-22 00:44:00+02:00,0.053452,worker


Unnamed: 0,0,1,actor
0,2024-05-21 14:41:00+02:00,7.723000e+02,standalone
1,2024-05-21 14:42:00+02:00,8.387833e+02,standalone
2,2024-05-21 14:43:00+02:00,2.019817e+03,standalone
3,2024-05-21 14:44:00+02:00,1.057937e+04,standalone
4,2024-05-21 14:45:00+02:00,1.451012e+07,standalone
...,...,...,...
351,2024-05-22 00:41:00+02:00,3.764700e+03,worker
352,2024-05-22 00:42:00+02:00,1.265267e+03,worker
353,2024-05-22 00:43:00+02:00,9.089500e+02,worker
354,2024-05-22 00:44:00+02:00,1.101650e+03,worker


Unnamed: 0,0,1,actor
0,2024-05-21 14:41:00+02:00,304.150000,standalone
1,2024-05-21 14:42:00+02:00,409.700000,standalone
2,2024-05-21 14:43:00+02:00,16876.933333,standalone
3,2024-05-21 14:44:00+02:00,37654.316667,standalone
4,2024-05-21 14:45:00+02:00,25105.083333,standalone
...,...,...,...
352,2024-05-22 00:42:00+02:00,1145.200000,worker
353,2024-05-22 00:43:00+02:00,454.283333,worker
354,2024-05-22 00:44:00+02:00,625.416667,worker
355,2024-05-22 00:45:00+02:00,28339.000000,worker


In [36]:
px.line(disk_df, x="direction", y="Read", color="actor", title="Disk usage").show()
px.line(disk_df, x="direction", y="Write", color="actor", title="Disk usage").show()

In [37]:
px.line(iops_df, x="direction", y="Write", color="actor", title="Disk usage").show()
px.line(iops_df, x="direction", y="Read", color="actor", title="Disk usage").show()

In [38]:
px.line(net_df, x="remote_type", y="Google Services", color="actor", title="Net usage").show()
px.line(net_df, x="remote_type", y="External (or VMs Different Project)", color="actor", title="Net usage").show()
px.line(net_df, x="remote_type", y="VMs Different Region (outside europe-central2)", color="actor", title="Net usage").show()
px.line(net_df, x="remote_type", y="VMs Different Region (outside europe-west4)", color="actor", title="Net usage").show()
px.line(sent_bytes_df, x=0, y=1, color="actor", title="Sent bytes").show()
px.line(received_bytes_df, x=0, y=1, color="actor", title="Received bytes").show()

In [39]:
px.line(cpu_df, x=0, y=1, color="actor", title="CPU usage").show()