# Simple PCAP file loading and structure explotation

In this example, we will simply load a sample PCAP file and explore its contents using the tools we saw in the demo. Later in the tutorial we will look at how to query this data for the attributes we may wish to utilise for a machine learning example.


In [2]:
import dpkt
import gzip
import os
import datetime

In [6]:
os.listdir('../data')

['pima-indians-diabetes.data.csv',
 'lyrics.txt',
 'moby_dick.txt',
 'net100.txt.gz',
 'Readme.md',
 'maccdc2012_00000.pcap.gz',
 'email']

In [7]:
print(os.listdir('../data'))
input_zipfile = '../data/maccdc2012_00000.pcap.gz'

['pima-indians-diabetes.data.csv', 'lyrics.txt', 'moby_dick.txt', 'net100.txt.gz', 'Readme.md', 'maccdc2012_00000.pcap.gz', 'email']


In [8]:
pcap_data = gzip.open(input_zipfile, 'rb')

In [9]:
pcap = dpkt.pcap.Reader(pcap_data)

In [10]:
top20 = list(next(pcap) for _ in range(20))
for ts, buf in top20:    
    eth = dpkt.ethernet.Ethernet(buf)

    thetime = datetime.datetime.utcfromtimestamp(float(ts))
    timestr = thetime.strftime("%H:%M:%S.%f")
    
    ip = eth.data
    tcp = ip.data
    
    srcPort = tcp.sport
    destPort = tcp.dport
    print("%s: %s --> %s" % (timestr, str(srcPort), str(destPort)))

12:30:00.000000: 443 --> 46117
12:30:00.000000: 46117 --> 443
12:30:00.000000: 50463 --> 80
12:30:00.000000: 443 --> 46117
12:30:00.000000: 50465 --> 80
12:30:00.000000: 55173 --> 445
12:30:00.000000: 80 --> 50463
12:30:00.000000: 443 --> 46117
12:30:00.000000: 80 --> 50465
12:30:00.000000: 443 --> 46117
12:30:00.000000: 443 --> 46117
12:30:00.000000: 443 --> 46117
12:30:00.000000: 443 --> 46117
12:30:00.000000: 443 --> 46117
12:30:00.000000: 445 --> 55173
12:30:00.000000: 50465 --> 80
12:30:00.000000: 50465 --> 80
12:30:00.000000: 55173 --> 445
12:30:00.000000: 445 --> 55173
12:30:00.000000: 80 --> 50465


In [13]:
http_traffic = [(ts, buf) for (ts, buf) in top20 if dpkt.ethernet.Ethernet(buf).data.data.dport ==80]

In [15]:
len(http_traffic)

4

In [17]:
print(http_traffic[-1])

(1331901000.0, b'\x00\x16G\x9d\xf2\xc2\x00\x0c)AK\xe7\x81\x00\x00x\x08\x00E\x00\x00\xda~%@\x00@\x06\x8a\\\xc0\xa8\xcaO\xc0\xa8\xe5\xfb\xc5!\x00P\x9e\xb2\x07\xe5\xb5\x8e\x17\x93\x80\x18\x03\x91\x15\xbc\x00\x00\x01\x01\x08\n\x00\x86y\xc7\x00\x00\x00\x00HEAD /DEASLog02.nsf HTTP/1.1\r\nConnection: close\r\nUser-Agent: Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html)\r\nHost: 192.168.229.251\r\n\r\n')


In [18]:
ts, buf = top20[5]
eth = dpkt.ethernet.Ethernet(buf)

thetime = datetime.datetime.utcfromtimestamp(float(ts))
timestr = thetime.strftime("%H:%M:%S.%f")

ip = eth.data
tcp = ip.data

srcPort = tcp.sport
destPort = tcp.dport

In [19]:
eth

Ethernet(dst=b'\x00\x16G\x9d\xf2\xc2', src=b'\x00\x0c)AK\xe7', type=33024, vlan_tags=[VLANtag8021Q(pri=0, cfi=0, id=120)], vlanid=120, priority=0, cfi=0, data=IP(len=199, id=31531, off=16384, p=6, sum=36299, src=b'\xc0\xa8\xcaO', dst=b'\xc0\xa8\xe5\x99', opts=b'', data=TCP(sport=55173, dport=445, seq=1828138798, ack=427054423, off=8, flags=24, win=913, sum=25258, opts=b'\x01\x01\x08\n\x00\x86y\xc7;\x9a[U', data=b'\x00\x00\x00\x8f\xffSMBs\x00\x00\x00\x00\x18E`\x00\x00A\xd1m}Y\x1c\x86\xb7\x00\x00\x00\x00\xd4Y\x00\x00\x01\x00\r\xff\x00\x00\x00\xff\xff\x01\x00\x01\x00\x00\x00\x00\x00\x18\x00\x18\x00\x00\x00\x00\x00P\x00\x00\x00R\x00\xe1\x83\x83\x9d\xfc6\\u\xb1#Bn\xc9\x7f<\r!]\xf3\x92\x8a\x1f\x12\x7f\xe1\x83\x83\x9d\xfc6\\u\xb1#Bn\xc9\x7f<\r!]\xf3\x92\x8a\x1f\x12\x7fadministrator\x00\x00Nmap\x00Native Lanman\x00')))

In [20]:
ip

IP(len=199, id=31531, off=16384, p=6, sum=36299, src=b'\xc0\xa8\xcaO', dst=b'\xc0\xa8\xe5\x99', opts=b'', data=TCP(sport=55173, dport=445, seq=1828138798, ack=427054423, off=8, flags=24, win=913, sum=25258, opts=b'\x01\x01\x08\n\x00\x86y\xc7;\x9a[U', data=b'\x00\x00\x00\x8f\xffSMBs\x00\x00\x00\x00\x18E`\x00\x00A\xd1m}Y\x1c\x86\xb7\x00\x00\x00\x00\xd4Y\x00\x00\x01\x00\r\xff\x00\x00\x00\xff\xff\x01\x00\x01\x00\x00\x00\x00\x00\x18\x00\x18\x00\x00\x00\x00\x00P\x00\x00\x00R\x00\xe1\x83\x83\x9d\xfc6\\u\xb1#Bn\xc9\x7f<\r!]\xf3\x92\x8a\x1f\x12\x7f\xe1\x83\x83\x9d\xfc6\\u\xb1#Bn\xc9\x7f<\r!]\xf3\x92\x8a\x1f\x12\x7fadministrator\x00\x00Nmap\x00Native Lanman\x00'))

In [21]:
tcp

TCP(sport=55173, dport=445, seq=1828138798, ack=427054423, off=8, flags=24, win=913, sum=25258, opts=b'\x01\x01\x08\n\x00\x86y\xc7;\x9a[U', data=b'\x00\x00\x00\x8f\xffSMBs\x00\x00\x00\x00\x18E`\x00\x00A\xd1m}Y\x1c\x86\xb7\x00\x00\x00\x00\xd4Y\x00\x00\x01\x00\r\xff\x00\x00\x00\xff\xff\x01\x00\x01\x00\x00\x00\x00\x00\x18\x00\x18\x00\x00\x00\x00\x00P\x00\x00\x00R\x00\xe1\x83\x83\x9d\xfc6\\u\xb1#Bn\xc9\x7f<\r!]\xf3\x92\x8a\x1f\x12\x7f\xe1\x83\x83\x9d\xfc6\\u\xb1#Bn\xc9\x7f<\r!]\xf3\x92\x8a\x1f\x12\x7fadministrator\x00\x00Nmap\x00Native Lanman\x00')