# Network Simulation Data Visualization

This notebook visualizes the data collected from the realistic network simulation, including:
- **Network Snapshots**: Aggregated network-wide metrics over time
- **Interface Metrics**: Per-host interface statistics

We use **Polars** for fast data manipulation and **Plotly** for interactive visualizations.

In [1]:
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime

# Set display options
pl.Config.set_tbl_rows(20)
pl.Config.set_fmt_str_lengths(100)

polars.config.Config

## 1. Load the Datasets

Load the network snapshots and interface metrics from the simulation output.

In [2]:
# Load network snapshots (aggregated network-wide metrics)
snapshots = pl.read_csv("../dumps/network_snapshots.csv")

# Load interface metrics (per-host statistics)
interfaces = pl.read_csv("../dumps/interface_metrics.csv")

print(f"Network Snapshots: {snapshots.shape[0]} rows, {snapshots.shape[1]} columns")
print(f"Interface Metrics: {interfaces.shape[0]} rows, {interfaces.shape[1]} columns")

Network Snapshots: 265 rows, 10 columns
Interface Metrics: 5677 rows, 11 columns


## 2. Explore Dataset Structure

### Network Snapshots Schema

In [3]:
# Display schema and first rows of network snapshots
print("Schema:")
print(snapshots.schema)
print("\nFirst 5 rows:")
snapshots.head(5)

Schema:
Schema({'timestamp': Float64, 'total_rx_bytes': Int64, 'total_tx_bytes': Int64, 'total_rx_packets': Int64, 'total_tx_packets': Int64, 'total_dropped': Int64, 'total_errors': Int64, 'active_switches': Int64, 'rx_throughput_mbps': Int64, 'tx_throughput_mbps': Float64})

First 5 rows:


timestamp,total_rx_bytes,total_tx_bytes,total_rx_packets,total_tx_packets,total_dropped,total_errors,active_switches,rx_throughput_mbps,tx_throughput_mbps
f64,i64,i64,i64,i64,i64,i64,i64,i64,f64
1767100000.0,0,13739248,0,196293,2,0,16,0,0.0
1767100000.0,0,13740860,0,196324,2,0,16,0,0.025792
1767100000.0,0,13742074,0,196347,2,0,16,0,0.019424
1767100000.0,0,13742214,0,196349,2,0,16,0,0.00224
1767100000.0,0,13744850,0,196399,2,0,16,0,0.042176


### Interface Metrics Schema

In [4]:
# Display schema and first rows of interface metrics
print("Schema:")
print(interfaces.schema)
print("\nFirst 5 rows:")
interfaces.head(5)

Schema:
Schema({'timestamp': Float64, 'host': String, 'interface': String, 'rx_bytes': Int64, 'rx_packets': Int64, 'rx_errors': Int64, 'rx_dropped': Int64, 'tx_bytes': Int64, 'tx_packets': Int64, 'tx_errors': Int64, 'tx_dropped': Int64})

First 5 rows:


timestamp,host,interface,rx_bytes,rx_packets,rx_errors,rx_dropped,tx_bytes,tx_packets,tx_errors,tx_dropped
f64,str,str,i64,i64,i64,i64,i64,i64,i64,i64
1767100000.0,"""h3""","""h3-eth0""",121768,1741,0,0,656,8,0,0
1767100000.0,"""h3""","""h3-eth0""",121768,1741,0,0,656,8,0,0
1767100000.0,"""h3""","""h3-eth0""",121820,1742,0,0,656,8,0,0
1767100000.0,"""h3""","""h3-eth0""",121820,1742,0,0,656,8,0,0
1767100000.0,"""h3""","""h3-eth0""",121820,1742,0,0,656,8,0,0


## 3. Summary Statistics

In [5]:
# Summary statistics for network snapshots
print("Network Snapshots - Summary Statistics:")
snapshots.describe()

Network Snapshots - Summary Statistics:


statistic,timestamp,total_rx_bytes,total_tx_bytes,total_rx_packets,total_tx_packets,total_dropped,total_errors,active_switches,rx_throughput_mbps,tx_throughput_mbps
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",265.0,265.0,265.0,265.0,265.0,265.0,265.0,265.0,265.0,265.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",1767100000.0,0.0,239400000.0,0.0,386403.803774,2.0,0.0,16.0,0.0,24.864963
"""std""",52.392602,0.0,116660000.0,0.0,104154.001356,0.0,0.0,0.0,0.0,18.789014
"""min""",1767100000.0,0.0,13739248.0,0.0,196293.0,2.0,0.0,16.0,0.0,0.0
"""25%""",1767100000.0,0.0,158544621.0,0.0,309766.0,2.0,0.0,16.0,0.0,18.81656
"""50%""",1767100000.0,0.0,246713793.0,0.0,388655.0,2.0,0.0,16.0,0.0,20.789424
"""75%""",1767100000.0,0.0,334159824.0,0.0,470515.0,2.0,0.0,16.0,0.0,23.945024
"""max""",1767100000.0,0.0,425565199.0,0.0,560503.0,2.0,0.0,16.0,0.0,106.62992


In [6]:
# Summary statistics for interface metrics
print("Interface Metrics - Summary Statistics:")
interfaces.describe()

Interface Metrics - Summary Statistics:


statistic,timestamp,host,interface,rx_bytes,rx_packets,rx_errors,rx_dropped,tx_bytes,tx_packets,tx_errors,tx_dropped
str,f64,str,str,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",5677.0,"""5677""","""5677""",5677.0,5677.0,5677.0,5677.0,5677.0,5677.0,5677.0,5677.0
"""null_count""",0.0,"""0""","""0""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",1767100000.0,,,2595000.0,4946.975515,0.0,0.0,2417100.0,2212.005989,0.0,0.0
"""std""",53.097506,,,7649100.0,6805.722235,0.0,0.0,6721500.0,4201.444566,0.0,0.0
"""min""",1767100000.0,"""h0""","""h0-eth0""",121628.0,1739.0,0.0,0.0,656.0,8.0,0.0,0.0
"""25%""",1767100000.0,,,190606.0,2153.0,0.0,0.0,1266.0,17.0,0.0,0.0
"""50%""",1767100000.0,,,261406.0,3274.0,0.0,0.0,7490.0,107.0,0.0,0.0
"""75%""",1767100000.0,,,1322862.0,4132.0,0.0,0.0,231402.0,2406.0,0.0,0.0
"""max""",1767100000.0,"""h9""","""h9-eth0""",55325876.0,51240.0,0.0,0.0,43319316.0,20831.0,0.0,0.0


## 4. Data Preprocessing

Convert timestamps to relative time (seconds from start) for better visualization.

In [7]:
# Convert timestamps to relative time (seconds from simulation start)
start_time = snapshots["timestamp"].min()

snapshots = snapshots.with_columns(
    (pl.col("timestamp") - start_time).alias("time_seconds")
)

interfaces = interfaces.with_columns(
    (pl.col("timestamp") - start_time).alias("time_seconds")
)

# Show simulation duration
duration = snapshots["time_seconds"].max()
print(f"Simulation duration: {duration:.1f} seconds ({duration / 60:.1f} minutes)")

Simulation duration: 184.5 seconds (3.1 minutes)


## 5. Network Throughput Over Time

Visualize the network-wide throughput (TX) during the simulation.

In [8]:
# Plot TX throughput over time
fig = px.line(
    snapshots.to_pandas(),
    x="time_seconds",
    y="tx_throughput_mbps",
    title="Network TX Throughput Over Time",
    labels={
        "time_seconds": "Time (seconds)",
        "tx_throughput_mbps": "Throughput (Mbps)",
    },
)
fig.update_layout(template="plotly_white", hovermode="x unified", height=400)
fig.show()

## 6. Cumulative Traffic Over Time

Visualize total bytes transmitted over the simulation duration.

In [9]:
# Plot cumulative bytes transmitted
fig = px.area(
    snapshots.to_pandas(),
    x="time_seconds",
    y="total_tx_bytes",
    title="Cumulative Bytes Transmitted Over Time",
    labels={"time_seconds": "Time (seconds)", "total_tx_bytes": "Total TX Bytes"},
)
fig.update_layout(template="plotly_white", height=400)
# Format y-axis to show MB
fig.update_yaxes(tickformat=".2s")
fig.show()

## 7. Packet Statistics Over Time

Visualize total packets and dropped packets during simulation.

In [10]:
# Create subplot for packets and dropped packets
fig = make_subplots(
    rows=2,
    cols=1,
    subplot_titles=("Total TX Packets Over Time", "Dropped Packets Over Time"),
    vertical_spacing=0.15,
)

# Total packets
fig.add_trace(
    go.Scatter(
        x=snapshots["time_seconds"].to_list(),
        y=snapshots["total_tx_packets"].to_list(),
        mode="lines",
        name="TX Packets",
        line=dict(color="blue"),
    ),
    row=1,
    col=1,
)

# Dropped packets
fig.add_trace(
    go.Scatter(
        x=snapshots["time_seconds"].to_list(),
        y=snapshots["total_dropped"].to_list(),
        mode="lines",
        name="Dropped",
        line=dict(color="red"),
    ),
    row=2,
    col=1,
)

fig.update_layout(height=600, template="plotly_white", showlegend=True)
fig.update_xaxes(title_text="Time (seconds)", row=2, col=1)
fig.update_yaxes(title_text="Packets", row=1, col=1)
fig.update_yaxes(title_text="Dropped Packets", row=2, col=1)
fig.show()

## 8. Per-Host Traffic Analysis

Analyze traffic patterns for each host in the network.

In [11]:
# Aggregate total traffic per host
host_traffic = (
    interfaces.group_by("host")
    .agg(
        [
            pl.col("rx_bytes").max().alias("total_rx_bytes"),
            pl.col("tx_bytes").max().alias("total_tx_bytes"),
            pl.col("rx_packets").max().alias("total_rx_packets"),
            pl.col("tx_packets").max().alias("total_tx_packets"),
        ]
    )
    .sort("host")
)

# Convert to MB for readability
host_traffic = host_traffic.with_columns(
    [
        (pl.col("total_rx_bytes") / 1_000_000).alias("rx_mb"),
        (pl.col("total_tx_bytes") / 1_000_000).alias("tx_mb"),
    ]
)

host_traffic

host,total_rx_bytes,total_tx_bytes,total_rx_packets,total_tx_packets,rx_mb,tx_mb
str,i64,i64,i64,i64,f64,f64
"""h0""",55325876,16673075,51240,20831,55.325876,16.673075
"""h1""",146975,2561,2115,33,0.146975,0.002561
"""h10""",144242,866,2093,11,0.144242,0.000866
"""h11""",296520,3016,3535,43,0.29652,0.003016
"""h12""",1302567,26421,3674,397,1.302567,0.026421
"""h13""",264636,2296,3807,32,0.264636,0.002296
"""h14""",1351366,28200,4353,417,1.351366,0.0282
"""h15""",1364079,2071119,4406,3690,1.364079,2.071119
"""h2""",354257,8219,2159,116,0.354257,0.008219
"""h3""",151388,1397624,2100,12316,0.151388,1.397624


In [12]:
# Bar chart: Total traffic per host
fig = px.bar(
    host_traffic.to_pandas(),
    x="host",
    y=["rx_mb", "tx_mb"],
    title="Total Traffic Per Host (MB)",
    labels={"value": "Traffic (MB)", "variable": "Direction"},
    barmode="group",
)
fig.update_layout(
    template="plotly_white", height=450, xaxis_title="Host", legend_title="Traffic Type"
)
fig.show()

## 9. Host Traffic Over Time (Heatmap)

Visualize traffic patterns for each host throughout the simulation.

In [13]:
# Calculate throughput per host over time
# Bucket into 5-second intervals
interfaces_with_throughput = interfaces.with_columns(
    [(pl.col("time_seconds") / 5).floor().cast(pl.Int32).alias("time_bucket")]
)

# Calculate delta bytes per bucket (throughput proxy)
host_time_traffic = (
    interfaces_with_throughput.group_by(["host", "time_bucket"])
    .agg(
        [
            pl.col("tx_bytes").max().alias("max_tx_bytes"),
            pl.col("tx_bytes").min().alias("min_tx_bytes"),
        ]
    )
    .with_columns(
        [
            ((pl.col("max_tx_bytes") - pl.col("min_tx_bytes")) / 1000).alias(
                "tx_kb_delta"
            )
        ]
    )
)

# Pivot for heatmap
pivot_df = host_time_traffic.pivot(
    index="host", on="time_bucket", values="tx_kb_delta"
).sort("host")

# Get numeric columns for heatmap
time_cols = [c for c in pivot_df.columns if c != "host"]
hosts = pivot_df["host"].to_list()
z_data = pivot_df.select(time_cols).to_numpy()

fig = go.Figure(
    data=go.Heatmap(
        z=z_data,
        x=[int(c) * 5 for c in time_cols],
        y=hosts,
        colorscale="Viridis",
        colorbar=dict(title="TX (KB)"),
    )
)

fig.update_layout(
    title="Host TX Traffic Over Time (5-second buckets)",
    xaxis_title="Time (seconds)",
    yaxis_title="Host",
    template="plotly_white",
    height=500,
)
fig.show()

## 10. Traffic Distribution

Analyze the distribution of throughput values.

In [14]:
# Histogram of throughput values
fig = px.histogram(
    snapshots.filter(pl.col("tx_throughput_mbps") > 0).to_pandas(),
    x="tx_throughput_mbps",
    nbins=50,
    title="Distribution of TX Throughput (non-zero values)",
    labels={"tx_throughput_mbps": "Throughput (Mbps)"},
)
fig.update_layout(template="plotly_white", height=400)
fig.show()

## 11. Traffic Share by Host

Pie chart showing the proportion of total traffic generated by each host.

In [15]:
# Pie chart for traffic share
fig = px.pie(
    host_traffic.filter(pl.col("tx_mb") > 0).to_pandas(),
    values="tx_mb",
    names="host",
    title="TX Traffic Share by Host",
    hole=0.3,
)
fig.update_layout(template="plotly_white", height=500)
fig.show()

## 12. Individual Host Traffic Over Time

Line plots showing traffic evolution for selected hosts.

In [16]:
# Select top 5 hosts by traffic
top_hosts = (
    host_traffic.sort("total_tx_bytes", descending=True).head(5)["host"].to_list()
)

# Filter interfaces for top hosts
top_host_data = interfaces.filter(pl.col("host").is_in(top_hosts))

# Line plot for each host
fig = px.line(
    top_host_data.to_pandas(),
    x="time_seconds",
    y="tx_bytes",
    color="host",
    title="TX Bytes Over Time - Top 5 Active Hosts",
    labels={"time_seconds": "Time (seconds)", "tx_bytes": "TX Bytes (cumulative)"},
)
fig.update_layout(template="plotly_white", height=450, hovermode="x unified")
fig.show()

## 13. Network Activity Summary

Key metrics from the simulation.

In [17]:
# Calculate summary metrics
total_tx_bytes = snapshots["total_tx_bytes"].max()
total_tx_packets = snapshots["total_tx_packets"].max()
total_dropped = snapshots["total_dropped"].max()
avg_throughput = snapshots["tx_throughput_mbps"].mean()
max_throughput = snapshots["tx_throughput_mbps"].max()
num_hosts = interfaces["host"].n_unique()

print("=" * 50)
print("SIMULATION SUMMARY")
print("=" * 50)
print(f"Duration:              {duration:.1f} seconds")
print(f"Active Hosts:          {num_hosts}")
print(f"Data Points:           {snapshots.shape[0]} snapshots")
print(f"Total TX Bytes:        {total_tx_bytes / 1_000_000:.2f} MB")
print(f"Total TX Packets:      {total_tx_packets:,}")
print(f"Total Dropped:         {total_dropped}")
print(f"Average Throughput:    {avg_throughput:.2f} Mbps")
print(f"Peak Throughput:       {max_throughput:.2f} Mbps")
print("=" * 50)

SIMULATION SUMMARY
Duration:              184.5 seconds
Active Hosts:          16
Data Points:           265 snapshots
Total TX Bytes:        425.57 MB
Total TX Packets:      560,503
Total Dropped:         2
Average Throughput:    24.86 Mbps
Peak Throughput:       106.63 Mbps


## 14. Correlation Analysis

Examine correlations between different network metrics.

In [18]:
# Calculate correlation matrix for snapshot metrics
numeric_cols = [
    "total_tx_bytes",
    "total_tx_packets",
    "total_dropped",
    "total_errors",
    "active_switches",
    "tx_throughput_mbps",
]

# Compute correlations using Polars
corr_data = []
for col1 in numeric_cols:
    row = []
    for col2 in numeric_cols:
        corr = snapshots.select(pl.corr(col1, col2)).item()
        row.append(corr if corr is not None else 0)
    corr_data.append(row)

# Create heatmap
fig = go.Figure(
    data=go.Heatmap(
        z=corr_data,
        x=numeric_cols,
        y=numeric_cols,
        colorscale="RdBu",
        zmid=0,
        text=[[f"{v:.2f}" for v in row] for row in corr_data],
        texttemplate="%{text}",
        textfont={"size": 10},
        colorbar=dict(title="Correlation"),
    )
)

fig.update_layout(
    title="Correlation Matrix - Network Snapshot Metrics",
    template="plotly_white",
    height=500,
    width=700,
)
fig.show()

## 15. Rolling Average Throughput

Smooth the throughput data using a rolling window to identify trends.

In [19]:
# Calculate rolling average (10-point window)
window_size = 10
snapshots_with_rolling = snapshots.with_columns(
    [
        pl.col("tx_throughput_mbps")
        .rolling_mean(window_size=window_size)
        .alias("rolling_avg")
    ]
)

# Plot raw vs rolling average
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=snapshots_with_rolling["time_seconds"].to_list(),
        y=snapshots_with_rolling["tx_throughput_mbps"].to_list(),
        mode="lines",
        name="Raw Throughput",
        opacity=0.4,
        line=dict(color="blue"),
    )
)

fig.add_trace(
    go.Scatter(
        x=snapshots_with_rolling["time_seconds"].to_list(),
        y=snapshots_with_rolling["rolling_avg"].to_list(),
        mode="lines",
        name=f"Rolling Avg ({window_size} points)",
        line=dict(color="red", width=2),
    )
)

fig.update_layout(
    title="TX Throughput with Rolling Average",
    xaxis_title="Time (seconds)",
    yaxis_title="Throughput (Mbps)",
    template="plotly_white",
    height=450,
    hovermode="x unified",
)
fig.show()