In [1]:
# Extract directional packet sequence

from pathlib import Path
from packet_length_extractor import *

# pcap_file = "../dataset/scada/Modbus_polling_only_6RTU(106).pcap"
pcap_file = "../dataset/swat/Dec2019_00000_20191206100500_00000w_filtered(192.168.1.10-192.168.1.200)_test_kept_app.pcap"
    
if Path(pcap_file).exists():
    print("Extracting directed length sequence...")
        
    # Extract all information
    packets_info, dir_len_con_sequence, dir_len_sequence = extract_directed_length_sequence_with_control(pcap_file, protocol='enip')
    for dir_len_con in dir_len_con_sequence[:10]:
        print(dir_len_con)
        
    # Save to CSV
    output_file = pcap_file.replace('.pcap', '_length_control_sequence.csv')
    save_sequence_to_csv(packets_info, output_file)
        
    # Analyze statistics
    stats = analyze_sequence_statistics(packets_info)
    print("\nStatistics:")
    for key, value in stats.items():
        print(f"{key}: {value}")
        
else:
    print(f"File not found: {pcap_file}")

Extracting directed length sequence...
(Decimal('1575597904.409803'), 'C-108-0')
(Decimal('1575597904.415631'), 'S-233-0')
(Decimal('1575597904.527581'), 'C-108-0')
(Decimal('1575597904.532814'), 'S-233-0')
(Decimal('1575597904.910430'), 'C-108-0')
(Decimal('1575597904.913992'), 'S-233-0')
(Decimal('1575597905.011635'), 'C-108-0')
(Decimal('1575597905.015856'), 'S-233-0')
(Decimal('1575597905.413709'), 'C-108-0')
(Decimal('1575597905.418287'), 'S-233-0')
Sequence information saved to: ../dataset/swat/Dec2019_00000_20191206100500_00000w_filtered(192.168.1.10-192.168.1.200)_test_kept_app_length_control_sequence.csv

Statistics:
total_packets: 286
avg_length: 170.5
std_length: 62.5
min_length: 108
max_length: 233
C_count: 143
S_count: 143


In [3]:
# Split sequence by pattern

from pathlib import Path
from packet_length_extractor import *

csv_file = "../dataset/swat/Dec2019_00000_20191206100500_00000w_filtered(192.168.1.10-192.168.1.200)_test_kept_length_control_sequence.csv"

if Path(csv_file).exists():
    print("Extracting directed length sequence...")
    
    # pattern = ['C-108', 'S-60', 'S-233', 'C-60', 'C-108', 'S-60', 'S-233', 'C-60']
    pattern = ['C-108', 'S-60', 'S-233', 'C-108', 'S-60', 'S-233', 'C-60']
    _, dir_len_sequence = read_sequence_from_csv(csv_file, "dir_len")
    split_sequence, matched_indices, match_ratio, avg_interval, std_interval = split_sequence_by_pattern(dir_len_sequence, pattern)
    print(f"Totally matched {len(matched_indices)} times")
    print(f"Match ratio: {match_ratio:.3f}")
    print(f"Average interval: {avg_interval:.3f}")
    print(f"Standard deviation: {std_interval:.3f}")
    

Extracting directed length sequence...
Successfully read 502 packets from ../dataset/swat/Dec2019_00000_20191206100500_00000w_filtered(192.168.1.10-192.168.1.200)_test_kept_length_control_sequence.csv
Totally matched 56 times
Match ratio: 0.781
Average interval: 0.730
Standard deviation: 0.315
