# USB Protocol Analysis

This notebook analyzes the comprehensive USB dataset created by the Rust pcap converter.
The dataset contains complete USB communication data across multiple devices and sessions.


In [1]:
# Import required libraries
import sys
sys.path.append('../scripts')

import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from helpers import (
    load_master_dataset, get_session_stats, get_device_summary,
    print_session_summary, print_device_summary, analyze_control_packets,
    analyze_urb_transactions, get_payload_patterns, filter_by_device
)

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("📊 USB Protocol Analysis Environment Ready!")


📊 USB Protocol Analysis Environment Ready!


In [2]:
# Load the master USB dataset
df = load_master_dataset('../../usb_master_dataset.parquet')

print(f"\n📈 Dataset Overview:")
print(f"Total packets: {len(df):,}")
print(f"Total fields: {len(df.columns)}")
print(f"Devices: {sorted(df['device_address'].unique().to_list())}")
print(f"Sessions: {len(df['session_id'].unique())}")
print(f"Time span: {df['timestamp'].min():.1f}s to {df['timestamp'].max():.1f}s")


✅ Loaded 11,514 USB packets from ../../usb_master_dataset.parquet

📈 Dataset Overview:
Total packets: 11,514
Total fields: 41
Devices: [6, 9, 13, 16]
Sessions: 7
Time span: 0.0s to 295.6s


In [3]:
# Get detailed session statistics
session_stats = get_session_stats(df)
print_session_summary(session_stats)


📊 CAPTURE SESSION SUMMARY
📁 orig_adc_1000hz.6
   🔌 Device: 6
   📦 Packets: 1,240 total (605 with payload, 48.8%)
   ⏱️  Duration: 17.9s
   📏 Payload: avg 155.2b, max 968b
   🔗 Endpoints: 3, URB IDs: 104

📁 orig_adc_50hz.6
   🔌 Device: 6
   📦 Packets: 570 total (270 with payload, 47.4%)
   ⏱️  Duration: 16.1s
   📏 Payload: avg 21.2b, max 188b
   🔗 Endpoints: 3, URB IDs: 104

📁 orig_adc_record.6
   🔌 Device: 6
   📦 Packets: 420 total (195 with payload, 46.4%)
   ⏱️  Duration: 18.3s
   📏 Payload: avg 13.7b, max 188b
   🔗 Endpoints: 3, URB IDs: 62

📁 pd_capture_new.9
   🔌 Device: 9
   📦 Packets: 6,930 total (3462 with payload, 50.0%)
   ⏱️  Duration: 295.6s
   📏 Payload: avg 12.6b, max 130b
   🔗 Endpoints: 3, URB IDs: 442

📁 orig_with_pd.13
   🔌 Device: 13
   📦 Packets: 2,056 total (1013 with payload, 49.3%)
   ⏱️  Duration: 42.1s
   📏 Payload: avg 8.8b, max 188b
   🔗 Endpoints: 3, URB IDs: 104

📁 orig_open_close.16
   🔌 Device: 16
   📦 Packets: 152 total (61 with payload, 40.1%)
   ⏱️  Du

In [4]:
# Get device summary
device_summary = get_device_summary(df)
print_device_summary(device_summary)


🔌 DEVICE SUMMARY
Device 6:
  📊 2,230 packets across 3 sessions
  💾 1070 packets with payload data
  📏 Average payload: 94.3 bytes
  ⏱️  Time span: 18.3s

Device 9:
  📊 6,930 packets across 1 sessions
  💾 3462 packets with payload data
  📏 Average payload: 12.6 bytes
  ⏱️  Time span: 295.6s

Device 13:
  📊 2,056 packets across 1 sessions
  💾 1013 packets with payload data
  📏 Average payload: 8.8 bytes
  ⏱️  Time span: 42.1s

Device 16:
  📊 298 packets across 2 sessions
  💾 103 packets with payload data
  📏 Average payload: 12.8 bytes
  ⏱️  Time span: 9.8s

