In [1]:
!pip install scapy==2.4.4

Collecting scapy==2.4.4
  Downloading scapy-2.4.4.tar.gz (1.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.0 MB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/1.0 MB[0m [31m6.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scapy
  Building wheel for scapy (setup.py) ... [?25l[?25hdone
  Created wheel for scapy: filename=scapy-2.4.4-py2.py3-none-any.whl size=1189175 sha256=0455973757c5a55666c0e577e4f22d2bbe567b28545dd5e5db099c29e333a874
  Stored in directory: /root/.cache/pip/wheels/53/a6/eb/6f97ac2f21e282974d530a83a76edc9dd97f74e1c94628aa5e
Succes

In [12]:
import scapy
assert scapy.__version__ == '2.4.4', 'scapy version mismatch.'
from scapy.utils import PcapReader
from scapy.all import Raw
from scapy.layers.inet import IP, UDP, TCP
from datetime import datetime
import pandas as pd
from pathlib import Path

load dataset
- columns: abstime, monotime, wirelen, caplen, payload, y, y_desc, ProtocolType
- Scapy가 반환하는 pkt.time 객체는 일반적인 float type이 아니라 EDecimal 타입이므로 각각 float 형변환 필요
- pkt 객체에는 .caplen과 .wirelen 속성이 항상 존재하지 않을 수 있으니 len()으로 대체
- Scapy는 AVTP를 기본적으로 지원하지 않으므로 pkt.haslayer("AVTP") 나 AVTP in pkt 같은 코드는 실패함.
  - AVTP는 UDP 기반의 프로토콜로서 포트 번호 17220을 사용하기에 해당 포트번호를 기반으로 AVTP를 추정

In [24]:
def load_dateset(path_pcap: str or Path, max_packets=None) -> pd.DataFrame:
  packet_data = []
  t0 = None # 기준 시간

  with PcapReader(str(path_pcap)) as pcap_reader:
    for i, pkt in enumerate(pcap_reader):
      if max_packets and i >= max_packets:
        break
      try:
        abstime = datetime.fromtimestamp(float(pkt.time))
        if t0 is None:
          t0 = float(pkt.time)
        monotime = pkt.time - t0

        # Raw payload
        if Raw in pkt:
          payload_list = list(pkt.original)
        else:
          payload_list = []

        # protocols
        protocol_type = "Other"
        if UDP in pkt:
          udp_layer = pkt[UDP]
          if udp_layer.dport == 17220 or udp_layer.sport == 17220:
            protocol_type = "AVTP"
          else:
            protocol_type = "UDP"
        elif TCP in pkt:
          protocol_type = "TCP"
        elif IP in pkt:
          protocol_type = "IP"
        elif hasattr(pkt, 'original'): # estimate based on raw bytes
          raw_bytes = pkt.original
          if len(raw_bytes) > 14:
            ether_type = int.from_bytes(raw_bytes[12:14], byteorder='big')
            if ether_type == 0x22F0: # AVTP Ethertype
              protocol_type = "AVTP"

        packet_data.append({
          "abstime": abstime,
          "monotime": "{:.6f}". format(monotime), # 6자리만 출력
          "wirelen": len(pkt.original),
          "caplen": len(pkt),
          "payload": payload_list,
          "y": 0, # normal
          "y_desc": "Normal",
          "ProtocolType": protocol_type
          })
      except Exception as e:
        print(f"failed to load packet dump: {e}")
        continue

    return pd.DataFrame(packet_data)

In [25]:
df_train = load_dateset("/content/drive/MyDrive/Colab Notebooks/Automotive_Ethernet_with_Attack_original_10_17_19_50_training.pcap")
# df_test = load_dateset("/content/drive/MyDrive/Colab Notebooks/Automotive_Ethernet_with_Attack_original_10_17_20_04_test.pcap")

In [26]:
df_train

Unnamed: 0,abstime,monotime,wirelen,caplen,payload,y,y_desc,ProtocolType
0,2020-09-12 09:51:04.715221,0.000000,434,434,"[145, 239, 0, 0, 254, 0, 0, 252, 112, 0, 0, 3,...",0,Normal,Other
1,2020-09-12 09:51:04.715245,0.000024,434,434,"[145, 239, 0, 0, 254, 0, 0, 252, 112, 0, 0, 3,...",0,Normal,Other
2,2020-09-12 09:51:04.715326,0.000105,434,434,"[145, 239, 0, 0, 254, 0, 0, 252, 112, 0, 0, 3,...",0,Normal,Other
3,2020-09-12 09:51:04.715450,0.000229,434,434,"[145, 239, 0, 0, 254, 0, 0, 252, 112, 0, 0, 3,...",0,Normal,Other
4,2020-09-12 09:51:04.715559,0.000338,434,434,"[145, 239, 0, 0, 254, 0, 0, 252, 112, 0, 0, 3,...",0,Normal,Other
...,...,...,...,...,...,...,...,...
1203732,2020-09-12 10:00:16.911784,552.196563,60,60,"[220, 166, 50, 94, 72, 71, 220, 166, 50, 93, 2...",0,Normal,UDP
1203733,2020-09-12 10:00:16.912231,552.197010,60,60,"[220, 166, 50, 94, 72, 71, 220, 166, 50, 93, 2...",0,Normal,UDP
1203734,2020-09-12 10:00:16.912686,552.197465,60,60,"[220, 166, 50, 94, 72, 71, 220, 166, 50, 93, 2...",0,Normal,UDP
1203735,2020-09-12 10:00:16.913172,552.197951,60,60,"[220, 166, 50, 94, 72, 71, 220, 166, 50, 93, 2...",0,Normal,UDP
