# QoS Metrics Visualization for Network Forecasting

This notebook visualizes comprehensive Quality of Service metrics collected from the network simulation:

## Metrics Available
- **RTT (Round Trip Time)** - Network latency
- **Jitter** - Packet delay variation
- **MOS (Mean Opinion Score)** - Voice quality metric (1-5 scale)
- **R-Factor** - Voice quality rating (0-100)
- **Link Utilization** - Bandwidth usage percentage
- **Throughput** - Data transfer rates
- **Packet Loss** - Lost packet percentage
- **Congestion Index** - Network congestion level (0-1)
- **QoS Score** - Overall quality score

All metrics are available **per-host** for time series forecasting.

In [9]:
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from pathlib import Path

# Configure Polars display
pl.Config.set_tbl_rows(20)
pl.Config.set_fmt_str_lengths(80)

polars.config.Config

## 1. Load QoS Datasets

In [10]:
# Load all QoS datasets
data_dir = Path("../dumps")

# Load host-level QoS metrics
host_qos = pl.read_csv(data_dir / "host_qos_metrics.csv")

# Load network-level snapshots
network_snapshots = pl.read_csv(data_dir / "network_qos_snapshots.csv")

# Convert Unix timestamps (float seconds) to datetime
host_qos = host_qos.with_columns(
    pl.from_epoch(pl.col("timestamp"), time_unit="s").alias("timestamp")
)
network_snapshots = network_snapshots.with_columns(
    pl.from_epoch(pl.col("timestamp"), time_unit="s").alias("timestamp")
)

print(f"Host QoS metrics: {host_qos.shape[0]:,} rows, {host_qos.shape[1]} columns")
print(
    f"Network snapshots: {network_snapshots.shape[0]:,} rows, {network_snapshots.shape[1]} columns"
)

Host QoS metrics: 5,661 rows, 36 columns
Network snapshots: 354 rows, 26 columns


In [11]:
# Explore schemas
print("Host QoS Metrics Schema:")
print(host_qos.schema)
print("\n" + "=" * 60 + "\n")
print("Network Snapshots Schema:")
print(network_snapshots.schema)

Host QoS Metrics Schema:
Schema({'timestamp': Datetime(time_unit='us', time_zone=None), 'host': String, 'rx_bytes': Int64, 'tx_bytes': Int64, 'rx_packets': Int64, 'tx_packets': Int64, 'rx_errors': Int64, 'tx_errors': Int64, 'rx_dropped': Int64, 'tx_dropped': Int64, 'rtt_ms': Float64, 'rtt_min_ms': Float64, 'rtt_max_ms': Float64, 'rtt_avg_ms': Float64, 'jitter_ms': Float64, 'jitter_avg_ms': Float64, 'rx_throughput_mbps': Float64, 'tx_throughput_mbps': Float64, 'rx_throughput_kbps': Float64, 'tx_throughput_kbps': Float64, 'link_capacity_mbps': Float64, 'rx_utilization_pct': Float64, 'tx_utilization_pct': Float64, 'avg_utilization_pct': Float64, 'packet_loss_pct': Float64, 'packet_loss_rate': Float64, 'error_rate': Float64, 'r_factor': Float64, 'mos': Float64, 'congestion_index': Float64, 'queue_delay_ms': Float64, 'buffer_occupancy_pct': Float64, 'throughput_efficiency': Float64, 'goodput_ratio': Float64, 'qos_score': Float64, 'qos_category': String})


Network Snapshots Schema:
Schema({

## 2. Summary Statistics

In [12]:
# Summary statistics for key QoS metrics
key_metrics = [
    "rtt_ms",
    "jitter_ms",
    "mos",
    "r_factor",
    "avg_utilization_pct",
    "congestion_index",
    "qos_score",
]

summary = host_qos.select(key_metrics).describe()
summary

statistic,rtt_ms,jitter_ms,mos,r_factor,avg_utilization_pct,congestion_index,qos_score
str,f64,f64,f64,f64,f64,f64,f64
"""count""",5661.0,5661.0,5661.0,5661.0,5661.0,5661.0,5661.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",2.032076,0.012647,4.379805,91.765008,1.949529,0.007798,94.568236
"""std""",0.316893,0.360173,0.000455,0.020265,5.857087,0.023428,0.148015
"""min""",2.0,0.0,4.355738,90.698601,0.0,0.0,87.184264
"""25%""",2.0,0.0,4.379826,91.765972,0.0,0.0,94.573386
"""50%""",2.000041,4.1e-05,4.379827,91.765998,0.003995,1.6e-05,94.573469
"""75%""",2.000804,0.000111,4.379827,91.766,0.078205,0.000313,94.573486
"""max""",22.0,18.036761,4.379827,91.766,63.726296,0.254905,94.573486


In [13]:
# QoS category distribution
qos_distribution = (
    host_qos.group_by("qos_category")
    .agg(pl.len().alias("count"))
    .sort("count", descending=True)
)

fig = px.pie(
    qos_distribution.to_pandas(),
    values="count",
    names="qos_category",
    title="QoS Category Distribution",
    color="qos_category",
    color_discrete_map={
        "excellent": "#00CC96",
        "good": "#636EFA",
        "fair": "#FFA15A",
        "poor": "#EF553B",
        "bad": "#AB63FA",
    },
)
fig.show()

## 3. RTT (Round-Trip Time) Analysis

In [14]:
# RTT over time per host
fig = px.line(
    host_qos.to_pandas(),
    x="timestamp",
    y="rtt_ms",
    color="host",
    title="Round-Trip Time (RTT) Over Time per Host",
    labels={"rtt_ms": "RTT (ms)", "timestamp": "Time"},
)
fig.update_layout(
    height=500,
    xaxis_title="Time",
    yaxis_title="RTT (milliseconds)",
    legend_title="Host",
)
fig.show()

In [15]:
# RTT distribution per host (box plot)
fig = px.box(
    host_qos.to_pandas(),
    x="host",
    y="rtt_ms",
    title="RTT Distribution by Host",
    labels={"rtt_ms": "RTT (ms)", "host": "Host"},
    color="host",
)
fig.update_layout(height=500, xaxis_tickangle=-45, showlegend=False)
fig.show()

## 4. Jitter Analysis

In [16]:
# Jitter over time per host
fig = px.line(
    host_qos.to_pandas(),
    x="timestamp",
    y="jitter_ms",
    color="host",
    title="Jitter Over Time per Host",
    labels={"jitter_ms": "Jitter (ms)", "timestamp": "Time"},
)
fig.update_layout(
    height=500,
    xaxis_title="Time",
    yaxis_title="Jitter (milliseconds)",
    legend_title="Host",
)
fig.show()

In [17]:
# RTT vs Jitter correlation scatter plot
fig = px.scatter(
    host_qos.to_pandas(),
    x="rtt_ms",
    y="jitter_ms",
    color="host",
    title="RTT vs Jitter Correlation",
    labels={"rtt_ms": "RTT (ms)", "jitter_ms": "Jitter (ms)"},
    hover_data=["timestamp", "qos_category"],
)
fig.update_layout(height=500)
fig.show()

## 5. Voice Quality Metrics (MOS & R-Factor)

MOS (Mean Opinion Score) is a 1-5 scale for voice quality, and R-Factor is the ITU-T G.107 E-model score (0-100).

In [18]:
# MOS over time per host
fig = px.line(
    host_qos.to_pandas(),
    x="timestamp",
    y="mos",
    color="host",
    title="Mean Opinion Score (MOS) Over Time per Host",
    labels={"mos": "MOS (1-5)", "timestamp": "Time"},
)
fig.add_hline(y=4.0, line_dash="dash", line_color="green", annotation_text="Good (4.0)")
fig.add_hline(
    y=3.6, line_dash="dash", line_color="orange", annotation_text="Fair (3.6)"
)
fig.add_hline(y=2.6, line_dash="dash", line_color="red", annotation_text="Poor (2.6)")
fig.update_layout(
    height=500, xaxis_title="Time", yaxis_title="MOS Score", yaxis_range=[1, 5]
)
fig.show()

In [19]:
# R-Factor over time per host
fig = px.line(
    host_qos.to_pandas(),
    x="timestamp",
    y="r_factor",
    color="host",
    title="R-Factor (E-Model) Over Time per Host",
    labels={"r_factor": "R-Factor (0-100)", "timestamp": "Time"},
)
fig.add_hline(
    y=80, line_dash="dash", line_color="green", annotation_text="High Quality (80)"
)
fig.add_hline(
    y=70, line_dash="dash", line_color="orange", annotation_text="Medium Quality (70)"
)
fig.add_hline(
    y=50, line_dash="dash", line_color="red", annotation_text="Low Quality (50)"
)
fig.update_layout(
    height=500, xaxis_title="Time", yaxis_title="R-Factor", yaxis_range=[0, 100]
)
fig.show()

In [22]:
# MOS vs R-Factor relationship (should be highly correlated)
fig = px.scatter(
    host_qos.to_pandas(),
    x="r_factor",
    y="mos",
    color="qos_category",
    title="MOS vs R-Factor Relationship",
    labels={"r_factor": "R-Factor", "mos": "MOS"},
    color_discrete_map={
        "excellent": "#00CC96",
        "good": "#636EFA",
        "fair": "#FFA15A",
        "poor": "#EF553B",
        "bad": "#AB63FA",
    },
)
fig.update_layout(height=500)
fig.show()

## 6. Link Utilization & Congestion Analysis

In [23]:
# Utilization over time per host
fig = px.line(
    host_qos.to_pandas(),
    x="timestamp",
    y="avg_utilization_pct",
    color="host",
    title="Link Utilization Over Time per Host",
    labels={"avg_utilization_pct": "Utilization (%)", "timestamp": "Time"},
)
fig.add_hline(
    y=80, line_dash="dash", line_color="red", annotation_text="High Load (80%)"
)
fig.add_hline(
    y=50, line_dash="dash", line_color="orange", annotation_text="Moderate (50%)"
)
fig.update_layout(
    height=500, xaxis_title="Time", yaxis_title="Utilization (%)", yaxis_range=[0, 100]
)
fig.show()

In [26]:
# Congestion index over time per host
fig = px.line(
    host_qos.to_pandas(),
    x="timestamp",
    y="congestion_index",
    color="host",
    title="Congestion Index Over Time per Host",
    labels={"congestion_index": "Congestion Index (0-1)", "timestamp": "Time"},
)
fig.add_hline(
    y=0.7, line_dash="dash", line_color="red", annotation_text="Congested (0.7)"
)
fig.add_hline(
    y=0.3, line_dash="dash", line_color="orange", annotation_text="Moderate (0.3)"
)
fig.update_layout(
    height=500, xaxis_title="Time", yaxis_title="Congestion Index", yaxis_range=[0, 1]
)
fig.show()

In [29]:
# Utilization vs Congestion correlation
fig = px.scatter(
    host_qos.to_pandas(),
    x="avg_utilization_pct",
    y="congestion_index",
    color="host",
    title="Utilization vs Congestion Correlation",
    labels={
        "avg_utilization_pct": "Utilization (%)",
        "congestion_index": "Congestion Index",
    },
)
fig.update_layout(height=500)
fig.show()

## 7. QoS Score Analysis

In [30]:
# QoS score over time per host
fig = px.line(
    host_qos.to_pandas(),
    x="timestamp",
    y="qos_score",
    color="host",
    title="Overall QoS Score Over Time per Host",
    labels={"qos_score": "QoS Score (0-100)", "timestamp": "Time"},
)
fig.add_hline(
    y=80, line_dash="dash", line_color="green", annotation_text="Excellent (80)"
)
fig.add_hline(y=60, line_dash="dash", line_color="blue", annotation_text="Good (60)")
fig.add_hline(y=40, line_dash="dash", line_color="orange", annotation_text="Fair (40)")
fig.update_layout(
    height=500, xaxis_title="Time", yaxis_title="QoS Score", yaxis_range=[0, 100]
)
fig.show()

In [31]:
# QoS score distribution histogram
fig = px.histogram(
    host_qos.to_pandas(),
    x="qos_score",
    color="host",
    nbins=50,
    title="QoS Score Distribution by Host",
    labels={"qos_score": "QoS Score", "count": "Frequency"},
)
fig.update_layout(height=500, barmode="overlay")
fig.update_traces(opacity=0.7)
fig.show()

## 8. Throughput Analysis

In [33]:
# Use existing throughput columns (already in Mbps)
host_qos_with_mbps = host_qos.with_columns(
    (pl.col("rx_throughput_mbps") + pl.col("tx_throughput_mbps")).alias("total_mbps")
)

# Total throughput over time per host
fig = px.line(
    host_qos_with_mbps.to_pandas(),
    x="timestamp",
    y="total_mbps",
    color="host",
    title="Total Throughput Over Time per Host",
    labels={"total_mbps": "Throughput (Mbps)", "timestamp": "Time"},
)
fig.update_layout(height=500, xaxis_title="Time", yaxis_title="Throughput (Mbps)")
fig.show()

In [35]:
# RX vs TX throughput comparison
fig = go.Figure()

for host in (
    host_qos_with_mbps["host"].unique().to_list()[:5]
):  # Limit to 5 hosts for clarity
    host_data = host_qos_with_mbps.filter(pl.col("host") == host).to_pandas()
    fig.add_trace(
        go.Scatter(
            x=host_data["timestamp"],
            y=host_data["rx_throughput_mbps"],
            name=f"{host} RX",
            line=dict(dash="solid"),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=host_data["timestamp"],
            y=host_data["tx_throughput_mbps"],
            name=f"{host} TX",
            line=dict(dash="dash"),
        )
    )

fig.update_layout(
    title="RX vs TX Throughput Comparison (Top 5 Hosts)",
    xaxis_title="Time",
    yaxis_title="Throughput (Mbps)",
    height=500,
)
fig.show()

## 9. Host Comparison Heatmaps

In [37]:
# Average QoS metrics per host (for heatmap)
avg_metrics = (
    host_qos.group_by("host")
    .agg(
        [
            pl.col("rtt_ms").mean().alias("avg_rtt_ms"),
            pl.col("jitter_ms").mean().alias("avg_jitter_ms"),
            pl.col("mos").mean().alias("avg_mos"),
            pl.col("r_factor").mean().alias("avg_r_factor"),
            pl.col("avg_utilization_pct").mean().alias("avg_utilization"),
            pl.col("congestion_index").mean().alias("avg_congestion"),
            pl.col("qos_score").mean().alias("avg_qos_score"),
        ]
    )
    .sort("host")
)

avg_metrics

host,avg_rtt_ms,avg_jitter_ms,avg_mos,avg_r_factor,avg_utilization,avg_congestion,avg_qos_score
str,f64,f64,f64,f64,f64,f64,f64
"""h0""",2.314299,0.176223,4.379556,91.75377,15.490845,0.061963,94.500627
"""h1""",2.00004,6.4e-05,4.379827,91.765996,0.003917,1.6e-05,94.57346
"""h10""",2.00003,5e-05,4.379827,91.765997,0.002963,1.2e-05,94.573466
"""h11""",2.000158,0.000236,4.379827,91.765987,0.015107,6e-05,94.573389
"""h12""",2.003198,0.002669,4.379823,91.765834,0.243598,0.000974,94.572387
"""h13""",2.000037,6e-05,4.379827,91.765997,0.003595,1.4e-05,94.573462
"""h14""",2.003088,0.002023,4.379824,91.765866,0.245005,0.00098,94.572652
"""h15""",2.00789,0.003178,4.379822,91.765753,0.69244,0.00277,94.572168
"""h2""",2.000536,0.000744,4.379826,91.765958,0.050964,0.000204,94.573181
"""h3""",2.003217,0.000216,4.379826,91.765951,0.311322,0.001245,94.57339


In [38]:
# Normalized heatmap of QoS metrics
metrics_cols = [
    "avg_rtt_ms",
    "avg_jitter_ms",
    "avg_mos",
    "avg_r_factor",
    "avg_utilization",
    "avg_congestion",
    "avg_qos_score",
]

# Normalize each column to 0-1 range for comparison
normalized = avg_metrics.select(
    [
        pl.col("host"),
        *[
            (
                (pl.col(c) - pl.col(c).min())
                / (pl.col(c).max() - pl.col(c).min() + 1e-10)
            ).alias(c)
            for c in metrics_cols
        ],
    ]
)

# Create heatmap
heatmap_data = normalized.select(metrics_cols).to_numpy()
hosts = normalized["host"].to_list()

fig = px.imshow(
    heatmap_data,
    x=[m.replace("avg_", "").replace("_", " ").title() for m in metrics_cols],
    y=hosts,
    title="Normalized QoS Metrics Heatmap by Host",
    color_continuous_scale="RdYlGn",
    aspect="auto",
)
fig.update_layout(height=600)
fig.show()

## 10. Network-Wide Snapshots Analysis

In [40]:
# Network-wide MOS over time
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=network_snapshots.to_pandas()["timestamp"],
        y=network_snapshots.to_pandas()["avg_mos"],
        name="Average MOS",
        line=dict(color="blue"),
    )
)

fig.add_trace(
    go.Scatter(
        x=network_snapshots.to_pandas()["timestamp"],
        y=network_snapshots.to_pandas()["min_mos"],
        name="Min MOS",
        line=dict(color="red", dash="dash"),
    )
)

fig.add_hline(y=4.0, line_dash="dot", line_color="gray", annotation_text="Good Quality")
fig.update_layout(
    title="Network-Wide MOS Over Time",
    xaxis_title="Time",
    yaxis_title="MOS Score",
    height=500,
    yaxis_range=[1, 5],
)
fig.show()

In [42]:
# Network throughput over time (already in Mbps)
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=network_snapshots.to_pandas()["timestamp"],
        y=network_snapshots.to_pandas()["total_rx_mbps"],
        name="RX",
        fill="tozeroy",
        line=dict(color="blue"),
    )
)

fig.add_trace(
    go.Scatter(
        x=network_snapshots.to_pandas()["timestamp"],
        y=network_snapshots.to_pandas()["total_tx_mbps"],
        name="TX",
        fill="tozeroy",
        line=dict(color="red"),
    )
)

fig.update_layout(
    title="Network-Wide Total Throughput",
    xaxis_title="Time",
    yaxis_title="Throughput (Mbps)",
    height=500,
)
fig.show()

## 11. Per-Host Time Series for Forecasting

Loading individual host time series files for forecasting analysis.

In [43]:
# Load per-host time series files
per_host_dir = data_dir / "per_host_timeseries"

if per_host_dir.exists():
    host_files = list(per_host_dir.glob("*_qos_timeseries.csv"))
    print(f"Found {len(host_files)} per-host time series files:")
    for f in sorted(host_files):
        size_kb = f.stat().st_size / 1024
        print(f"  - {f.name} ({size_kb:.1f} KB)")
else:
    print("Per-host time series directory not found")

Found 16 per-host time series files:
  - h0_qos_timeseries.csv (155.0 KB)
  - h10_qos_timeseries.csv (109.5 KB)
  - h11_qos_timeseries.csv (111.2 KB)
  - h12_qos_timeseries.csv (113.0 KB)
  - h13_qos_timeseries.csv (112.0 KB)
  - h14_qos_timeseries.csv (115.9 KB)
  - h15_qos_timeseries.csv (128.7 KB)
  - h1_qos_timeseries.csv (112.2 KB)
  - h2_qos_timeseries.csv (115.1 KB)
  - h3_qos_timeseries.csv (158.2 KB)
  - h4_qos_timeseries.csv (158.6 KB)
  - h5_qos_timeseries.csv (155.2 KB)
  - h6_qos_timeseries.csv (116.6 KB)
  - h7_qos_timeseries.csv (114.3 KB)
  - h8_qos_timeseries.csv (109.6 KB)
  - h9_qos_timeseries.csv (115.0 KB)


In [47]:
# Load and visualize a sample host time series
if per_host_dir.exists() and host_files:
    sample_file = host_files[0]
    sample_host = pl.read_csv(sample_file)
    sample_host = sample_host.with_columns(
        pl.from_epoch(pl.col("timestamp"), time_unit="s").alias("timestamp")
    )

    host_name = sample_file.stem.replace("_qos_timeseries", "")
    print(f"Sample host: {host_name}")
    print(f"Shape: {sample_host.shape}")
    print(
        f"Time range: {sample_host['timestamp'].min()} to {sample_host['timestamp'].max()}"
    )
    print(f"\nColumns available for forecasting:")
    print(sample_host.columns)

Sample host: h9
Shape: (354, 36)
Time range: 2025-12-30 03:28:25 to 2025-12-30 03:31:29

Columns available for forecasting:
['timestamp', 'host', 'rx_bytes', 'tx_bytes', 'rx_packets', 'tx_packets', 'rx_errors', 'tx_errors', 'rx_dropped', 'tx_dropped', 'rtt_ms', 'rtt_min_ms', 'rtt_max_ms', 'rtt_avg_ms', 'jitter_ms', 'jitter_avg_ms', 'rx_throughput_mbps', 'tx_throughput_mbps', 'rx_throughput_kbps', 'tx_throughput_kbps', 'link_capacity_mbps', 'rx_utilization_pct', 'tx_utilization_pct', 'avg_utilization_pct', 'packet_loss_pct', 'packet_loss_rate', 'error_rate', 'r_factor', 'mos', 'congestion_index', 'queue_delay_ms', 'buffer_occupancy_pct', 'throughput_efficiency', 'goodput_ratio', 'qos_score', 'qos_category']


In [48]:
# Multi-metric view for sample host
if per_host_dir.exists() and host_files:
    fig = make_subplots(
        rows=4,
        cols=1,
        subplot_titles=["QoS Score", "MOS", "RTT (ms)", "Utilization (%)"],
        shared_xaxes=True,
        vertical_spacing=0.08,
    )

    sample_df = sample_host.to_pandas()

    fig.add_trace(
        go.Scatter(
            x=sample_df["timestamp"],
            y=sample_df["qos_score"],
            name="QoS Score",
            line=dict(color="purple"),
        ),
        row=1,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x=sample_df["timestamp"],
            y=sample_df["mos"],
            name="MOS",
            line=dict(color="blue"),
        ),
        row=2,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x=sample_df["timestamp"],
            y=sample_df["rtt_ms"],
            name="RTT",
            line=dict(color="orange"),
        ),
        row=3,
        col=1,
    )
    fig.add_trace(
        go.Scatter(
            x=sample_df["timestamp"],
            y=sample_df["avg_utilization_pct"],
            name="Utilization",
            line=dict(color="green"),
        ),
        row=4,
        col=1,
    )

    fig.update_layout(
        title=f"Multi-Metric View for {host_name}", height=800, showlegend=False
    )
    fig.show()

## 12. Correlation Analysis for Feature Selection

In [49]:
# Correlation matrix for key QoS metrics
corr_cols = [
    "rx_bytes_rate",
    "tx_bytes_rate",
    "rx_packets_rate",
    "tx_packets_rate",
    "rtt_ms",
    "jitter_ms",
    "mos",
    "r_factor",
    "utilization_pct",
    "congestion_index",
    "qos_score",
]

# Calculate correlation matrix
corr_matrix = host_qos.select(corr_cols).to_pandas().corr()

fig = px.imshow(
    corr_matrix,
    x=corr_cols,
    y=corr_cols,
    title="QoS Metrics Correlation Matrix",
    color_continuous_scale="RdBu",
    color_continuous_midpoint=0,
    aspect="equal",
)
fig.update_layout(height=700)
fig.show()

ColumnNotFoundError: unable to find column "rx_bytes_rate"; valid columns: ["timestamp", "host", "rx_bytes", "tx_bytes", "rx_packets", "tx_packets", "rx_errors", "tx_errors", "rx_dropped", "tx_dropped", "rtt_ms", "rtt_min_ms", "rtt_max_ms", "rtt_avg_ms", "jitter_ms", "jitter_avg_ms", "rx_throughput_mbps", "tx_throughput_mbps", "rx_throughput_kbps", "tx_throughput_kbps", "link_capacity_mbps", "rx_utilization_pct", "tx_utilization_pct", "avg_utilization_pct", "packet_loss_pct", "packet_loss_rate", "error_rate", "r_factor", "mos", "congestion_index", "queue_delay_ms", "buffer_occupancy_pct", "throughput_efficiency", "goodput_ratio", "qos_score", "qos_category"]

## 13. Data Export Summary

Summary of all exported data files available for forecasting models.

In [None]:
# Summary of all data exports
print("=" * 60)
print("QoS DATA EXPORTS SUMMARY")
print("=" * 60)

print("\n📊 AGGREGATED DATA FILES:")
print("-" * 40)
for csv_file in data_dir.glob("*.csv"):
    size_mb = csv_file.stat().st_size / (1024 * 1024)
    print(f"  {csv_file.name}: {size_mb:.2f} MB")

print("\n📈 PER-HOST TIME SERIES (for forecasting):")
print("-" * 40)
if per_host_dir.exists():
    total_size = 0
    for f in sorted(per_host_dir.glob("*.csv")):
        size_kb = f.stat().st_size / 1024
        total_size += size_kb
        print(f"  {f.name}: {size_kb:.1f} KB")
    print(f"\n  Total: {total_size / 1024:.2f} MB")

print("\n🎯 AVAILABLE METRICS FOR FORECASTING:")
print("-" * 40)
forecast_metrics = [
    "qos_score - Overall quality score (0-100)",
    "mos - Voice quality score (1-5)",
    "r_factor - ITU-T E-model score (0-100)",
    "rtt_ms - Round-trip time",
    "jitter_ms - Delay variation",
    "utilization_pct - Link utilization",
    "congestion_index - Congestion level (0-1)",
    "rx_bytes_rate - Receive throughput",
    "tx_bytes_rate - Transmit throughput",
]
for m in forecast_metrics:
    print(f"  • {m}")