In [1]:
!pip install pyshark
!pip install nest_asyncio





In [1]:
import pyshark
import nest_asyncio
from collections import defaultdict

nest_asyncio.apply()

# Load pcap file
cap = pyshark.FileCapture('pkt.IPV4.randomprotofield.pcap')

bytecount = 0
flow_set = set()
flow_pkt_count = defaultdict(int)
pkt_times = []

pktcount = 0
start_time = None
end_time = None

for pkt in cap:
    try:
        src = pkt.ip.src
        dst = pkt.ip.dst
        size = int(pkt.length)
        timestamp = float(pkt.sniff_timestamp)

        # Set start and end time
        if start_time is None:
            start_time = timestamp
        end_time = timestamp

        pktcount += 1
        bytecount += size

        flow_id = (src, dst)
        flow_set.add(flow_id)
        flow_pkt_count[flow_id] += 1

        pkt_times.append(timestamp)

    except AttributeError:
        continue

cap.close()

# Feature Calculations
flows = len(flow_set)
dur = end_time - start_time if start_time and end_time else 0

dt = 0
if len(pkt_times) >= 2:
    deltas = [pkt_times[i+1] - pkt_times[i] for i in range(len(pkt_times)-1)]
    dt = sum(deltas) / len(deltas)

pktrate = pktcount / dur if dur else 0
pktperflow = pktcount / flows if flows else 0

tx_bytes = bytecount
rx_bytes = 0  # Optional: use directional logic if needed

# Final top features
features = {
    "bytecount": bytecount,
    "flows": flows,
    "dt": dt,
    "dur": dur,
    "pktrate": pktrate,
    "pktperflow": pktperflow,
    "rx_bytes": rx_bytes,
    "tx_bytes": tx_bytes
}

# Output
print("\nExtracted Top Features:")
for key in features:
    print(f"{key}: {features[key]}")



Extracted Top Features:
bytecount: 6548916
flows: 28640
dt: 1.0170550258512514e-05
dur: 0.7633709907531738
pktrate: 98324.40701728086
pktperflow: 2.620740223463687
rx_bytes: 0
tx_bytes: 6548916
