In [26]:
import sys
sys.path.append('../scripts')
import polars as pl
from helpers import load_master_dataset

# Load master dataset and filter by source file
source_file = "orig_adc_record.6"
df = load_master_dataset('../../usb_master_dataset.parquet')
df = df.filter(pl.col('source_file') == source_file)
bulk_df = df.filter(pl.col('transfer_type') != '0x02')

print(f"Source file: {source_file}")
print(f"Dataset: {len(df):,} packets")
print(f"Bulk transfers: {len(bulk_df):,} packets ({len(bulk_df)/len(df)*100:.1f}%)")
print(f"Control packets filtered: {len(df)-len(bulk_df)}")
print(f"\nColumn names ({len(df.columns)}):")
print(df.columns)
print(f"\nUnique source files:")
print(df['source_file'].unique())


✅ Loaded 11,514 USB packets from ../../usb_master_dataset.parquet
Source file: orig_adc_record.6
Dataset: 420 packets
Bulk transfers: 394 packets (93.8%)
Control packets filtered: 26

Column names (40):
['session_id', 'frame_number', 'timestamp', 'timestamp_absolute', 'direction', 'device_address', 'bus_id', 'endpoint_address', 'endpoint_number', 'transfer_type', 'urb_type', 'urb_status', 'data_length', 'urb_length', 'payload_hex', 'setup_flag', 'data_flag', 'interval', 'start_frame', 'frame_length', 'frame_protocols', 'source_file', 'bmrequest_type', 'brequest', 'brequest_name', 'wvalue', 'windex', 'wlength', 'descriptor_type', 'descriptor_index', 'language_id', 'transfer_flags', 'copy_of_transfer_flags', 'urb_id', 'usb_src', 'usb_dst', 'usb_addr', 'urb_ts_sec', 'urb_ts_usec', 'added_datetime']

Unique source files:
shape: (1,)
Series: 'source_file' [str]
[
	"orig_adc_record.6"
]


In [29]:
# List all URBs in chronological order with details
urb_analysis = df.group_by('urb_id').agg([
    pl.len().alias('packet_count'),
    pl.col('urb_type').unique().alias('urb_types'),
    pl.col('frame_number').min().alias('first_frame'),
    pl.col('frame_number').max().alias('last_frame'),
    pl.col('timestamp').min().alias('start_time'),
    pl.col('timestamp').max().alias('end_time'),
    (pl.col('timestamp').max() - pl.col('timestamp').min()).alias('duration'),
    pl.col('transfer_type').unique().alias('transfer_types'),
    pl.col('direction').unique().alias('directions'),
    pl.col('data_length').sum().alias('total_bytes')
]).sort('start_time')

print(f"All URB transactions in chronological order ({len(urb_analysis)} URBs):")
print("=" * 80)

for i, row in enumerate(urb_analysis.iter_rows(named=True), 1):
    urb_short = row['urb_id'][-8:]
    packets = row['packet_count']
    duration = row['duration']
    total_bytes = row['total_bytes']
    directions = ', '.join(row['directions'])
    
    print(f"{i:2d}. URB {urb_short}: {packets:2d} packets, {duration:8.6f}s, {total_bytes:3d}b, {directions}")
    


All URB transactions in chronological order (62 URBs):
 1. URB bad75900:  6 packets, 0.000805s, 157b, D->H
 2. URB badc0300: 12 packets, 14.405805s, 134b, D->H, H->D
 3. URB badc00c0: 10 packets, 14.824952s, 104b, D->H, H->D
 4. URB badc03c0: 10 packets, 15.450614s, 160b, D->H, H->D
 5. URB badc0780: 12 packets, 14.315705s, 172b, D->H, H->D
 6. URB badc0cc0: 10 packets, 14.305216s,  94b, H->D, D->H
 7. URB 11df3e40:  8 packets, 11.511222s, 178b, D->H
 8. URB 11df3540: 12 packets, 13.190623s, 130b, D->H, H->D
 9. URB 11df3600: 12 packets, 12.770312s, 242b, D->H
10. URB 11df3900:  4 packets, 0.084426s,  24b, D->H
11. URB 11df3b40:  4 packets, 0.078278s,  68b, H->D, D->H
12. URB 11df3180:  4 packets, 0.016261s,  84b, D->H
13. URB 11df3d80:  4 packets, 0.037965s,  72b, H->D
14. URB 78480840: 22 packets, 2.394499s, 452b, D->H, H->D
15. URB 78480c00: 24 packets, 2.178724s, 252b, H->D, D->H
16. URB 46b45840: 30 packets, 9.350188s, 292b, H->D, D->H
17. URB 46b45180: 32 packets, 9.348277s, 400b

In [28]:
# URB Statistics and Analysis
print("URB TRANSACTION STATISTICS")
print("=" * 50)

# Basic counts
simple_pairs = urb_analysis.filter(pl.col('packet_count') == 2)
multi_packets = urb_analysis.filter(pl.col('packet_count') > 2)

print(f"Total URB transactions: {len(urb_analysis)}")
print(f"Simple pairs (2 packets): {len(simple_pairs)} ({len(simple_pairs)/len(urb_analysis)*100:.1f}%)")
print(f"Multi-packet URBs: {len(multi_packets)} ({len(multi_packets)/len(urb_analysis)*100:.1f}%)")

# Packet count distribution
print(f"\nPacket count distribution:")
packet_dist = urb_analysis['packet_count'].value_counts().sort('packet_count')
print(packet_dist)

# Timing statistics
timing_stats = urb_analysis.select([
    pl.col('duration').mean().alias('avg_duration'),
    pl.col('duration').min().alias('min_duration'),
    pl.col('duration').max().alias('max_duration'),
    pl.col('duration').std().alias('std_duration')
])

print(f"\nTiming Statistics:")
print(timing_stats)

# Data transfer statistics
data_stats = urb_analysis.select([
    pl.col('total_bytes').sum().alias('total_bytes_all_urbs'),
    pl.col('total_bytes').mean().alias('avg_bytes_per_urb'),
    pl.col('total_bytes').max().alias('max_bytes_per_urb')
])

print(f"\nData Transfer Statistics:")
print(data_stats)

# Show longest running URBs
print(f"\nTop 5 longest running URBs:")
longest_urbs = urb_analysis.sort('duration', descending=True).head(5)
for row in longest_urbs.iter_rows(named=True):
    urb_short = row['urb_id'][-8:]
    packets = row['packet_count']
    duration = row['duration']
    total_bytes = row['total_bytes']
    print(f"  URB {urb_short}: {packets} packets, {duration:.3f}s, {total_bytes}b")


URB TRANSACTION STATISTICS
Total URB transactions: 62
Simple pairs (2 packets): 19 (30.6%)
Multi-packet URBs: 43 (69.4%)

Packet count distribution:
shape: (11, 2)
┌──────────────┬───────┐
│ packet_count ┆ count │
│ ---          ┆ ---   │
│ u32          ┆ u32   │
╞══════════════╪═══════╡
│ 2            ┆ 19    │
│ 4            ┆ 14    │
│ 6            ┆ 9     │
│ 8            ┆ 5     │
│ 10           ┆ 5     │
│ …            ┆ …     │
│ 14           ┆ 1     │
│ 22           ┆ 1     │
│ 24           ┆ 1     │
│ 30           ┆ 1     │
│ 32           ┆ 1     │
└──────────────┴───────┘

Timing Statistics:
shape: (1, 4)
┌──────────────┬──────────────┬──────────────┬──────────────┐
│ avg_duration ┆ min_duration ┆ max_duration ┆ std_duration │
│ ---          ┆ ---          ┆ ---          ┆ ---          │
│ f64          ┆ f64          ┆ f64          ┆ f64          │
╞══════════════╪══════════════╪══════════════╪══════════════╡
│ 4.911726     ┆ 0.000077     ┆ 15.450614    ┆ 5.228248     │
└────