In [None]:
from IPy import IP
from scapy.all import *
import pandas as pd
import numpy as np
import binascii  # binary to ASCII
from time import perf_counter
from ipaddress import ip_address
import logging
import time
import pyshark
import nest_asyncio
from prettytable import PrettyTable
from collections import Counter
nest_asyncio.apply()

In [None]:
# filepath = 'samples/icedid.pcap'
filepath = "/home/martin/Programming/C2Detective/samples/https_wireshark.pcap"
load_layer('tls')

# Load packets as a list in memory

In [None]:
packets = rdpcap(filepath)  # creates a list in memory
# creates a generator, packets are not not stored in memory
# packets = PcapReader(filepath)

---

# Packets parsing with Scapy (fast)

### External source IP count

In [None]:
src_ip_list =[]

for packet in packets:
  if IP in packet:
    try:
      src_ip = packet[IP].src
      if not ip_address(src_ip).is_private: # append only public IPs
        src_ip_list.append(src_ip)
    except:
      pass

cnt=Counter()
for ip in src_ip_list:
  cnt[ip] += 1

table= PrettyTable(["Source IP", "Count"])
for ip, count in cnt.most_common():
  table.add_row([ip, count])
print(table)

### External destination IP count

In [None]:
dst_ip_list =[]

for packet in packets:
  if IP in packet:
    try:
      dst_ip = packet[IP].dst
      if not ip_address(dst_ip).is_private: # append only public IPs
        dst_ip_list.append(dst_ip)
    except:
      pass

cnt=Counter()
for ip in dst_ip_list:
  cnt[ip] += 1

table= PrettyTable(["Destination IP", "Count"])
for ip, count in cnt.most_common():
  table.add_row([ip, count])
print(table)

## Anatomy of TLS packet structure

In [None]:
# for packet in packets:
#     print(packet.summary())
    # if packet.haslayer(SSL):
    #     packet.show()
    #     break

print(packets[1520].show())

## Anatomy of a DNS packet structure

In [None]:
# packet[1][0] = Ether
# packet[1][1] = IP
# packet[1][2] = UDP
# packet[1][3] = DNS
# packet[1][4] = DNSQR
# packet[1][5-9] = DNSRR

# ###[ Ethernet ]### 
#   dst       = 00:0e:53:07:f5:22
#   src       = 00:0b:db:a6:5f:13
#   type      = IPv4
# ###[ IP ]### 
#      version   = 4
#      ihl       = 5
#      tos       = 0x0
#      len       = 80
#      id        = 33736
#      flags     = 
#      frag      = 0
#      ttl       = 128
#      proto     = udp
#      chksum    = 0x7447
#      src       = 10.9.23.23
#      dst       = 10.9.23.101
#      \options   \
# ###[ UDP ]### 
#         sport     = domain
#         dport     = 56868
#         len       = 60
#         chksum    = 0x380a
# ###[ DNS ]### 
#            id        = 40087
#            qr        = 1
#            opcode    = QUERY
#            aa        = 0
#            tc        = 0
#            rd        = 1
#            ra        = 1
#            z         = 0
#            ad        = 0
#            cd        = 0
#            rcode     = ok
#            qdcount   = 1
#            ancount   = 1
#            nscount   = 0
#            arcount   = 0
#            \qd        \
#             |###[ DNS Question Record ]### 
#             |  qname     = 'trallfasterinf.com.'
#             |  qtype     = A
#             |  qclass    = IN
#            \an        \
#             |###[ DNS Resource Record ]### 
#             |  rrname    = 'trallfasterinf.com.'
#             |  type      = A
#             |  rclass    = IN
#             |  ttl       = 29
#             |  rdlen     = None
#             |  rdata     = 137.184.114.20
#            ns        = None
#            ar        = None

In [None]:
for packet in packets:
   if packet.haslayer(DNSRR):
        packet.show()
        break

## Extracting data

### Enumerate and extract DNSQR and DNSRR fields

In [None]:
# SOURCE : https://cybernomad.online/2014/05/12/scapy-iterating-over-dns-responses/

# <DNS  id=0 qr=1L opcode=QUERY aa=0L tc=0L rd=1L ra=1L z=0L ad=0L cd=0L rcode=ok qdcount=1 ancount=5 nscount=0 arcount=0 
# qd=<DNSQR  qname='www.stackoverflow.com.' qtype=A qclass=IN 
# |> an=<DNSRR  rrname='www.stackoverflow.com.' type=CNAME rclass=IN ttl=3379 rdata='stackoverflow.com.' 
# |<DNSRR  rrname='stackoverflow.com.' type=A rclass=IN ttl=79 rdata='151.101.1.69' 
# |<DNSRR  rrname='stackoverflow.com.' type=A rclass=IN ttl=79 rdata='151.101.65.69' 
# |<DNSRR  rrname='stackoverflow.com.' type=A rclass=IN ttl=79 rdata='151.101.129.69' 
# |<DNSRR  rrname='stackoverflow.com.' type=A rclass=IN ttl=79 rdata='151.101.193.69' 
# |>>>>> ns=None ar=None |>

for p in packets:
    if p.haslayer(DNSQR):
        qname = (p[DNSQR].qname).decode()
        print(f"DNS Question Record [qname]: {qname}")

    if p.haslayer(DNSRR):
        for x in range(p[DNS].ancount):
            rrname = (p[DNSRR][x].rrname).decode()
            rdata = (p[DNSRR][x].rdata)
            print(f"DNS Resource Record [rrname]: {rrname}")
            print(f"DNS Resource Record [rdata]: {rdata}")
           
        print("----------------------")

        # a_count = p[DNS].ancount
        # i = a_count + 4
        # while i > 4:
        #     print(p[0][i].rdata, p[0][i].rrname)
        #     i -= 1

### Extract unique connections

In [None]:
connetions_all = set()
connections_public_private = set()

for packet in packets:
    if 'IP' in packet:
        ip_layer = packet['IP']  # obtain the IPv4 header
        ip_src = ip_layer.src
        ip_dst = ip_layer.dst
        connetions_all.add((ip_src, ip_dst))

        if not ip_address(ip_src).is_private or not ip_address(ip_dst).is_private:
            connections_public_private.add((ip_src, ip_dst))

print(f">> Number of all connections: {len(connetions_all)}")
# print(f">> List of connections:")
# for connetion in connetions_all:
#     print(connetion)

print(f">> Number of connections containing public src/dst address: {len(connections_public_private)}")
print(f">> List of connections:")
for connetion in connections_public_private:
    print(connetion)

### Extract domains from DNS responses

In [None]:
rrnames = set()

# iterate through every packet
for packet in packets:
    # only interested packets with a DNS Round Robin layer
    if packet.haslayer(DNSRR):
        # if the an(swer) is a DNSRR, print the name it replied with
        if isinstance(packet.an, DNSRR):
            rrnames.add(packet.an.rrname.decode('UTF-8'))

print(f">> Found '{len(rrnames)}' unique 'rrnames'\n")
for name in rrnames:
    print(name)

---

# Packets parsing with PyShark (slow)

In [None]:
pkts = pyshark.FileCapture(filepath)

In [None]:
# EXTRACT SSL
for p in pkts:
    if 'SSL' in p:
        for layer in p.layers:
            if layer.layer_name == 'ssl':
                if hasattr(layer, 'x509ce_dnsname'):
                    print(layer.x509ce_dnsname)

In [None]:
# EXTRACT DNS QUERIES AND DNS RESPONSES
dns_queries = set()

for p in pkts:
    try: 
        if p['DNS'].flags_response == '0':
            rrname = p['DNS'].qry_name
            # QUERY
            # print(f"QUERY : {p['DNS'].qry_name}")
            dns_queries.add(rrname)
        # else:
        #     print(f"QUERY : {p['DNS'].qry_name}")
        #     # RESPONSE
        #     if hasattr(p['DNS'], 'a'):
        #         print(f"RESPONSE : {p['DNS'].a}")
        #     if hasattr(p['DNS'], 'aaaa'):
        #         print(f"RESPONSE : {p['DNS'].aaaa}")
    except KeyError as e:
        continue

for rrname in dns_queries:
    print(rrname)

In [None]:
for p in pkts:
    try:
        if hasattr(p['HTTP'], 'host'):
            print(f"Host: {p['HTTP'].host}")

        if p['HTTP'].get_field('User-Agent'):
            print(f"User-Agent: {p['HTTP'].get_field('User-Agent')}")

        if hasattr(p['HTTP'], 'request_uri'):
            # Check if it is a query
            if hasattr(p['HTTP'], 'host'):
                print(f"[HOST] http://{p['HTTP'].host}{p['HTTP'].request_uri}")
            else:
                print(f"[ IP ] http://{p['IP'].dst}{p['HTTP'].request_uri}")

    except KeyError as e:
        continue

---

# Transform packets to DataFrame (slow)

In [None]:
    def packets_to_df(packets):
        t_start = perf_counter()
        print(f"[{time.strftime('%H:%M:%S')}] [INFO] Transforming packet capture to DataFrame object ...")
        # save field names from IP/TCP/UDP to be used as columns in DataFrame
        ip_fields = [field.name for field in IP().fields_desc]
        tcp_fields = [field.name for field in TCP().fields_desc]
        udp_fields = [field.name for field in UDP().fields_desc]

        # ['version', 'ihl', 'tos', 'len', 'id', 'flags', 'frag', 'ttl', 'proto', 'chksum', 'src', 'dst', 'options', 'time', 'sport', 'dport', 'seq', 'ack', 'dataofs', 'reserved', 'flags', 'window', 'chksum', 'urgptr', 'options', 'payload', 'payload_raw', 'payload_hex']
        dataframe_fields = ip_fields + \
                           ['time'] + tcp_fields + ['payload', 'payload_raw', 'payload_hex']

        # create empty dataframe with defined column names
        df = pd.DataFrame(columns=dataframe_fields)

        # iterate over each packet, but load only the IP (layer 3) fields
        for packet in packets[IP]:
            # list of all values contained in a single packet -> one row of DF
            field_values = []

            # add all IP fields to dataframe
            for field in ip_fields:
                if field == 'options':
                    # count the number of options defined in IP Header (field name: options)
                    field_values.append(len(packet[IP].fields[field]))
                else:
                    # add the value of a current field into the list
                    field_values.append(packet[IP].fields[field])

            field_values.append(packet.time)

            layer_type = type(packet[IP].payload)

            # iterate over TCP/UDP (layer 4) fields
            for field in tcp_fields:
                try:
                    if field == 'options':
                        field_values.append(
                            len(packet[layer_type].fields[field]))
                    else:
                        field_values.append(packet[layer_type].fields[field])
                except:
                    field_values.append(None)

            # append different variations of the payload field from ###[ Raw ]### segment
            field_values.append(len(packet[layer_type].payload))  # payload
            field_values.append(
                packet[layer_type].payload.original)  # payload_raw
            field_values.append(binascii.hexlify(
                packet[layer_type].payload.original))  # payload_hex

            # add row to the DF
            df_append = pd.DataFrame([field_values], columns=dataframe_fields)
            df = pd.concat([df, df_append], axis=0)

        # reset Index
        df = df.reset_index()
        # drop old index column
        df = df.drop(columns="index")

        t_stop = perf_counter()
        print(f"[{time.strftime('%H:%M:%S')}] [INFO] Packet capture transformed in " + "{:.2f}s".format(t_stop - t_start))

        return df

In [None]:
df_packets = packets_to_df(packets)

In [None]:
print(">> Statistics")
packets_count = len(df_packets)
print(f">>> Loaded {packets_count} packets")

top_src_address = df_packets['src'].describe()['top']
print(f">>> Top source address: {top_src_address} ")
# print(df['src'].describe(),'\n\n')
# print(f">>> Top external source address: {None} ")

top_dst_address = df_packets['dst'].describe()['top']
print(f">>> Top destination address: {top_dst_address}")
# print(df['dst'].describe(),"\n\n")
# print(f">>> Top external destination address: {None} ")

unique_src_addresses = df_packets['src'].unique()
unique_src_addresses = unique_src_addresses.tolist()
external_src_addresses = []
for adr in unique_src_addresses:
    if not ip_address(adr).is_private:
        external_src_addresses.append(adr)
unique_dst_addresses = df_packets['dst'].unique()
unique_dst_addresses = unique_dst_addresses.tolist()
external_dest_addresses = []
for adr in unique_dst_addresses:
    if not ip_address(adr).is_private:
        external_dest_addresses.append(adr)

---