In [1]:
from scapy.all import * # Packet manipulation
import pandas as pd # Pandas - Create and Manipulate DataFrames
import numpy as np # Math Stuff (don't worry only used for one line :] )
import binascii # Binary to Ascii 
# import seaborn as sns
# sns.set(col1or_codes=True)
# %matplotlib inline

'''Use common fields in IP Packet to perform exploratory analysis on PCAP'''

'Use common fields in IP Packet to perform exploratory analysis on PCAP'

In [2]:
# rdpcap used to Read Pcap
pcap = rdpcap('samples/icedid.pcap')
pcap

<icedid.pcap: TCP:5179 UDP:374 ICMP:0 Other:254>

In [3]:
"""
0                   1                   2                   3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|          Source Port          |       Destination Port        |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|                        Sequence Number                        |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|                    Acknowledgment Number                      |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|  Data |           |U|A|P|R|S|F|                               |
| Offset| Reserved  |R|C|S|S|Y|I|            Window             |
|       |           |G|K|H|T|N|N|                               |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|           Checksum            |         Urgent Pointer        |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|                    Options                    |    Padding    |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|                             data                              |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
02 04 05 a0 01 03 03 05 01 01 08 0a 1d 74 65 c5 00 00 00 00 04 02 00 00
"""

# Frames/Packets/Segments all consists of fields
print()




## Exploring an item in packet list

In [4]:
# ETHERNET -> Internet Protocol -> Layer 4 Segments
# We're only interested in Layers 3 (IP) and 4 (TCP AND UDP) 
## We'll parse those two layers and the layer 4 payload
## When capturing we capture layer 2 frames and beyond

# Retrieving a single item from packet list
ethernet_frame = pcap[31]
ip_packet = ethernet_frame.payload
segment = ip_packet.payload
data = segment.payload # Retrieve payload that comes after layer 4

# Observe that we just popped off previous layer header
print(ethernet_frame.summary())
print(ip_packet.summary())
print(segment.summary())
print(data.summary()) # If blank, empty object
print("---------- Complete depiction of a packet ----------")

# Complete depiction of paket
## Achieving understanding that these are the fields will enable the ability 
## to ask the data more meaningful questions ie) type of layer 4 segment is defined in layer 3 packet
ethernet_frame.show()

Ether / IP / TCP 137.184.114.20:http > 10.9.23.101:58592 A / Raw
IP / TCP 137.184.114.20:http > 10.9.23.101:58592 A / Raw
TCP 137.184.114.20:http > 10.9.23.101:58592 A / Raw
Raw
---------- Complete depiction of a packet ----------
###[ Ethernet ]### 
  dst       = 00:0e:53:07:f5:22
  src       = 00:03:a0:e7:d7:61
  type      = IPv4
###[ IP ]### 
     version   = 4
     ihl       = 5
     tos       = 0x0
     len       = 1500
     id        = 34633
     flags     = 
     frag      = 0
     ttl       = 128
     proto     = tcp
     chksum    = 0x9098
     src       = 137.184.114.20
     dst       = 10.9.23.101
     \options   \
###[ TCP ]### 
        sport     = http
        dport     = 58592
        seq       = 1278967658
        ack       = 3213422612
        dataofs   = 5
        reserved  = 0
        flags     = A
        window    = 64240
        chksum    = 0x142e
        urgptr    = 0
        options   = []
###[ Raw ]### 
           load      = '\x03\\xd8\\xeeqZ7N\\xd6^\n@_y\\xaf\

In [5]:
# # Understanding the object types in scapy
# print(type(ethernet_frame))
# print(type(ip_packet))
# print(type(segment))

# # Packets can be filtered on layers ie) ethernet_frame[scapy.layers.l2.Ether]
# ethernet_type = type(ethernet_frame)
# ip_type = type(ip_packet)
# tcp_type = type(segment)
# print("Ethernet",pcap[ethernet_type])
# print("IP", pcap[ip_type])
# print("TCP", pcap[tcp_type])

# # Scapy provides this via import statements
# from scapy.layers.l2 import Ether
# from scapy.layers.inet import IP
# from scapy.layers.inet import TCP, UDP

# print("UDP", pcap[UDP])

## Convert PCAP to DataFrame


In [6]:
# Collect field names from IP/TCP/UDP (These will be columns in DF)
ip_fields = [field.name for field in IP().fields_desc]
tcp_fields = [field.name for field in TCP().fields_desc]
udp_fields = [field.name for field in UDP().fields_desc]

# ['version', 'ihl', 'tos', 'len', 'id', 'flags', 'frag', 'ttl', 'proto', 'chksum', 'src', 'dst', 'options', 'time', 'sport', 'dport', 'seq', 'ack', 'dataofs', 'reserved', 'flags', 'window', 'chksum', 'urgptr', 'options', 'payload', 'payload_raw', 'payload_hex']
dataframe_fields = ip_fields + ['time'] + tcp_fields + ['payload','payload_raw','payload_hex']
# print(dataframe_fields)


# Create blank DataFrame
df = pd.DataFrame(columns=dataframe_fields)
for packet in pcap[IP]:
    # Field array for each row of DataFrame
    field_values = []
    # Add all IP fields to dataframe
    for field in ip_fields:
        if field == 'options':
            # Retrieving number of options defined in IP Header
            field_values.append(len(packet[IP].fields[field]))
        else:
            field_values.append(packet[IP].fields[field])
    
    field_values.append(packet.time)
    
    layer_type = type(packet[IP].payload)
    # print(layer_type)
    for field in tcp_fields:
        try:
            if field == 'options':
                field_values.append(len(packet[layer_type].fields[field]))
            else:
                field_values.append(packet[layer_type].fields[field])
        except:
            field_values.append(None)
    
    # Append payload
    field_values.append(len(packet[layer_type].payload))
    field_values.append(packet[layer_type].payload.original)
    field_values.append(binascii.hexlify(packet[layer_type].payload.original))
    # Add row to DF
    df_append = pd.DataFrame([field_values], columns=dataframe_fields)
    df = pd.concat([df, df_append], axis=0)

# Reset Index
df = df.reset_index()
# Drop old index column
df = df.drop(columns="index")

In [7]:
# len(df)
# df.info
df['src'].describe()

count            5578
unique             21
top       10.9.23.101
freq             1887
Name: src, dtype: object

### DataFrame Basics

In [8]:
# Retrieve first row from DataFrame
# print(df.iloc[0])

print(df.shape)

# Return first 5 rows
df.head()

# Return last 5 rows
df.tail()

# Return the Source Address for all rows
df['src']

# Return Src Address, Dst Address, Src Port, Dst Port
df[['src','dst','sport','dport']]

(5578, 28)


Unnamed: 0,src,dst,sport,dport
0,10.9.23.101,10.9.23.23,56868,53
1,10.9.23.23,10.9.23.101,53,56868
2,10.9.23.101,137.184.114.20,58592,80
3,137.184.114.20,10.9.23.101,80,58592
4,10.9.23.101,137.184.114.20,58592,80
...,...,...,...,...
5573,5.252.177.10,10.9.23.101,443,58699
5574,10.9.23.101,5.252.177.10,58699,443
5575,5.252.177.10,10.9.23.101,443,58699
5576,5.252.177.10,10.9.23.101,443,58699


## Statistics

In [9]:
# Top Source Adddress
print("# Top Source Address")
print(df['src'].describe(),'\n\n')

# Top Destination Address
print("# Top Destination Address")
print(df['dst'].describe(),"\n\n")

frequent_address = df['src'].describe()['top']

# Who is the top address speaking to
print("# Who is Top Address Speaking to?")
print(df[df['src'] == frequent_address]['dst'].unique(),"\n\n")

# Who is the top address speaking to (dst ports)
print("# Who is the top address speaking to (Destination Ports)")
print(df[df['src'] == frequent_address]['dport'].unique(),"\n\n")

# Who is the top address speaking to (src ports)
print("# Who is the top address speaking to (Source Ports)")
print(df[df['src'] == frequent_address]['sport'].unique(),"\n\n")

# Top Source Address
count            5578
unique             21
top       10.9.23.101
freq             1887
Name: src, dtype: object 


# Top Destination Address
count            5578
unique             26
top       10.9.23.101
freq             3691
Name: dst, dtype: object 


# Who is Top Address Speaking to?
['10.9.23.23' '137.184.114.20' '64.227.116.208' '5.252.177.10'
 '10.9.23.255' '10.9.23.1' '13.107.246.57' '51.104.15.253' '40.91.73.169'
 '184.31.139.238' '20.42.73.27' '209.197.3.8' '20.189.173.13' '224.0.0.22'
 '239.255.255.250' '224.0.0.251' '224.0.0.252' '13.69.239.74'
 '51.11.168.232' '40.126.28.20' '52.184.206.73' '51.104.15.252'
 '78.128.112.139' '52.168.112.66' '104.46.162.224'] 


# Who is the top address speaking to (Destination Ports)
[53 80 443 135 49669 49693 138 137 139 445 None 1900 5353 5355 123 389
 3268] 


# Who is the top address speaking to (Source Ports)
[56868 58592 55087 58593 58594 58595 58962 58596 58597 58598 58599 138 137
 58600 58601 53912 58602 6271

In [26]:
from ipaddress import ip_address


In [33]:
print("Unique Source Addresses")
unique_src_addresses = df['src'].unique()
unique_src_addresses = unique_src_addresses.tolist()

external_src_addresses = []
for adr in unique_src_addresses:
    if not ip_address(adr).is_private:
        external_src_addresses.append(adr)

for src in external_src_addresses:
    print(src)


Unique Source Addresses
137.184.114.20
64.227.116.208
5.252.177.10
13.107.246.57
51.104.15.253
40.91.73.169
184.31.139.238
20.42.73.27
209.197.3.8
20.189.173.13
13.69.239.74
51.11.168.232
40.126.28.20
52.184.206.73
51.104.15.252
78.128.112.139
52.168.112.66
104.46.162.224


In [32]:
print("Unique Destination Addresses")
unique_dst_addresses = df['dst'].unique()
unique_dst_addresses = unique_dst_addresses.tolist()

external_dest_addresses = []
for adr in unique_dst_addresses:
    if not ip_address(adr).is_private:
        external_dest_addresses.append(adr)

for ext in external_dest_addresses:
    print(ext)

Unique Destination Addresses
137.184.114.20
64.227.116.208
5.252.177.10
13.107.246.57
51.104.15.253
40.91.73.169
184.31.139.238
20.42.73.27
209.197.3.8
20.189.173.13
224.0.0.22
239.255.255.250
224.0.0.251
224.0.0.252
13.69.239.74
51.11.168.232
40.126.28.20
52.184.206.73
51.104.15.252
78.128.112.139
52.168.112.66
104.46.162.224
