In [32]:
import numpy as np
np.random.seed(42)
np.set_printoptions(suppress=True)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
tqdm.pandas()

import glob, time, datetime, os, re
from scapy.all import sniff, IP, TCP, ARP

In [2]:
NOTEBOOK_PATH = "D:/Delta Stuff/Scifair20/"

# Create dict with packet IDs mapped to attack labels

In [3]:
f_names = glob.glob(NOTEBOOK_PATH + "IDS2017/TrafficLabelling/*.csv")
dfs = []
for i in tqdm(range(len(f_names))):
    f = f_names[i]
    df_i = pd.read_csv(open(f, encoding='utf-8', errors='ignore'))
    df_i['File'] = i
    dfs.append(df_i)
df = pd.concat(dfs, ignore_index=True)

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:18<00:00,  2.26s/it]


In [4]:
df.columns = [x.strip() for x in df.columns]

df2 = df[['Source IP', 'Destination IP', 'Source Port', 'Destination Port', 'Protocol', 'Timestamp', 'Label']]

df2 = df2.dropna()
df2 = df2.drop_duplicates()

In [5]:
def reformatDates(date):
    l = list(map(lambda x: x.lstrip("0"), re.split("/| |:", date)))
    return f"{l[0]}/{l[1]}/{l[2]} {l[3]}:{l[4]}"

In [6]:
df2['Timestamp'] = df2['Timestamp'].progress_apply(reformatDates)

100%|████████████████████████████████████████████████████████████████████| 2541697/2541697 [00:07<00:00, 329898.69it/s]


In [7]:
def groupAttacks(attack):
    attack = str(attack)
    group = attack
    if attack in ("DDoS", "DoS Hulk", "DoS GoldenEye", "DoS slowloris", "DoS Slowhttptest", "Heartbleed"):
        group = "DoS/DDos"
    elif attack.startswith("Web Attack"):
        group = "Web Attack"
    elif attack == "FTP-Patator":
        group = "Brute Force"
    elif attack == "SSH-Patator":
        group = "Brute Force"
    elif attack == "Bot":
        group = "Botnet"
    return group

In [8]:
df2['Label'] = df2['Label'].progress_apply(groupAttacks)

100%|███████████████████████████████████████████████████████████████████| 2541697/2541697 [00:02<00:00, 1139229.98it/s]


In [9]:
ipdict = {}

def add2ipdict(row):
    rowid = f"{row['Source IP']}-{row['Destination IP']}-{str(int(row['Source Port']))}-{str(int(row['Destination Port']))}-{str(row['Timestamp'])}"
    if rowid in ipdict:
        if ipdict[rowid] != row['Label']:
            print(f"Mismatch with {rowid}: {ipdict[rowid]} and {row['Label']}")
    else:
        ipdict[rowid] = row['Label']
        

In [10]:
df2.progress_apply(add2ipdict, axis=1)

100%|█████████████████████████████████████████████████████████████████████| 2541697/2541697 [01:38<00:00, 25903.83it/s]


0          None
1          None
2          None
3          None
4          None
           ... 
3119340    None
3119341    None
3119342    None
3119343    None
3119344    None
Length: 2541697, dtype: object

# Read in packets

In [28]:
pcap_files = glob.glob(NOTEBOOK_PATH + "IDS2017/PCAPs/*.pcap")
pcap_files

['D:/Delta Stuff/Scifair20/IDS2017/PCAPs\\Monday-WorkingHours.pcap',
 'D:/Delta Stuff/Scifair20/IDS2017/PCAPs\\Tuesday-WorkingHours.pcap']

In [47]:
pkt_df = pd.DataFrame(columns=["Bytes", "Label"])

In [40]:
def formatDate(date):
    fdate = datetime.datetime.fromtimestamp(date).strftime("%d/%m/%Y %H:%M")
    l = list(map(lambda x: x.lstrip("0"), re.split("/| |:", fdate)))
    return f"{l[0]}/{l[1]}/{l[2]} {str(int(l[3])+7)}:{l[4]}"

In [48]:
def processPacket(p):
    # Get source and destination IP addresses
    if IP in p:
        sip = p[IP].src
        dip = p[IP].dst
    elif ARP in p:
        sip = p[ARP].psrc
        dip = p[ARP].pdst
    else:
        return
        
    # Get source and destination ports
    if TCP in p:
        sp = p[TCP].sport
        dp = p[TCP].dport
    else:
        return
    
    # Get label
    try:
        pid = f"{sip}-{dip}-{sp}-{dp}-{formatDate(p.time)}"
        print(pid)
        label = ipdict[pid]
        pkt_df.loc[len(pkt_df)] = [str(p), label]
        print("Finished mapping packet to label " + label)
    except KeyError:
        print("Reversing...")
        pid = f"{dip}-{sip}-{dp}-{sp}-{formatDate(p.time)}"
        label = ipdict[pid]
        pkt_df.loc[len(pkt_df)] = [str(p), label]
        print("Finished mapping packet to label " + label)

In [50]:
for pcap in pcap_files[1:]:
    sniff(count=1000, offline=pcap, store=0, prn=processPacket)

192.168.10.5-192.168.10.3-49159-445-4/7/2017 8:53
Finished mapping packet to label BENIGN
192.168.10.5-192.168.10.3-49159-445-4/7/2017 8:53
Finished mapping packet to label BENIGN
192.168.10.3-192.168.10.5-445-49159-4/7/2017 8:53
Reversing...
Finished mapping packet to label BENIGN
192.168.10.3-192.168.10.5-445-49159-4/7/2017 8:53
Reversing...
Finished mapping packet to label BENIGN
192.168.10.5-192.168.10.3-49182-88-4/7/2017 8:54
Finished mapping packet to label BENIGN
192.168.10.5-192.168.10.3-49182-88-4/7/2017 8:54
Finished mapping packet to label BENIGN
192.168.10.3-192.168.10.5-88-49182-4/7/2017 8:54
Reversing...
Finished mapping packet to label BENIGN
192.168.10.3-192.168.10.5-88-49182-4/7/2017 8:54
Reversing...
Finished mapping packet to label BENIGN
192.168.10.5-192.168.10.3-49182-88-4/7/2017 8:54
Finished mapping packet to label BENIGN
192.168.10.5-192.168.10.3-49182-88-4/7/2017 8:54
Finished mapping packet to label BENIGN
192.168.10.5-192.168.10.3-49182-88-4/7/2017 8:54
Finis

KeyError: '192.168.10.3-192.168.10.5-445-49159-4/7/2017 8:54'

In [26]:
sorted(dict(df2[(df2['Source IP'] == '192.168.10.5') & (df2['Destination IP'] == '192.168.10.3')]['Source Port'].value_counts()))

[123.0,
 49152.0,
 49153.0,
 49154.0,
 49155.0,
 49156.0,
 49157.0,
 49158.0,
 49159.0,
 49160.0,
 49161.0,
 49162.0,
 49163.0,
 49164.0,
 49165.0,
 49166.0,
 49167.0,
 49168.0,
 49169.0,
 49170.0,
 49171.0,
 49172.0,
 49173.0,
 49174.0,
 49175.0,
 49176.0,
 49177.0,
 49178.0,
 49179.0,
 49180.0,
 49181.0,
 49182.0,
 49183.0,
 49184.0,
 49185.0,
 49186.0,
 49187.0,
 49188.0,
 49189.0,
 49190.0,
 49191.0,
 49192.0,
 49193.0,
 49194.0,
 49195.0,
 49196.0,
 49197.0,
 49198.0,
 49199.0,
 49200.0,
 49201.0,
 49202.0,
 49203.0,
 49204.0,
 49205.0,
 49206.0,
 49207.0,
 49208.0,
 49209.0,
 49210.0,
 49211.0,
 49212.0,
 49213.0,
 49214.0,
 49215.0,
 49216.0,
 49217.0,
 49218.0,
 49219.0,
 49220.0,
 49221.0,
 49222.0,
 49223.0,
 49224.0,
 49225.0,
 49226.0,
 49227.0,
 49228.0,
 49229.0,
 49230.0,
 49231.0,
 49232.0,
 49233.0,
 49234.0,
 49235.0,
 49236.0,
 49237.0,
 49238.0,
 49239.0,
 49240.0,
 49241.0,
 49242.0,
 49243.0,
 49244.0,
 49245.0,
 49246.0,
 49247.0,
 49248.0,
 49249.0,
 49250.0,
 4

In [157]:
len(ipdict)

2516345

In [30]:
pkts = sniff(count=100, offline=pcap_files[1])

In [43]:
pkts[53][IP].dst

'192.168.10.5'

In [44]:
pkts[53].display()

###[ Ethernet ]### 
  dst       = b8:ac:6f:36:0a:8b
  src       = 18:66:da:9b:e3:7d
  type      = IPv4
###[ IP ]### 
     version   = 4
     ihl       = 5
     tos       = 0x0
     len       = 52
     id        = 6977
     flags     = DF
     frag      = 0
     ttl       = 128
     proto     = tcp
     chksum    = 0x4a2a
     src       = 192.168.10.3
     dst       = 192.168.10.5
     \options   \
###[ TCP ]### 
        sport     = microsoft_ds
        dport     = 49159
        seq       = 2158492462
        ack       = 728086466
        dataofs   = 8
        reserved  = 0
        flags     = A
        window    = 2049
        chksum    = 0xf551
        urgptr    = 0
        options   = [('NOP', None), ('NOP', None), ('SAck', (728086465, 728086466))]



In [33]:
for i in range(len(pkts)):
    if (IP in pkts[i] or ARP in pkts[i]) and TCP in pkts[i]:
        print(i)

51
52
53
54
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [177]:
pkts[0].summary()

'Ether / IP / TCP 8.254.250.126:http > 192.168.10.5:49188 FA / Padding'

In [176]:
str(pkts[0])

"b'\\xb8\\xaco6\\n\\x8b\\x00\\xc1\\xb1\\x14\\xeb1\\x08\\x00E\\x00\\x00(\\x1b4\\x00\\x007\\x06\\x9ar\\x08\\xfe\\xfa~\\xc0\\xa8\\n\\x05\\x00P\\xc0$\\xdf\\xd7\\xad\\x8b\\x94\\xb88\\x9bP\\x11\\x01I\\xc54\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'"

In [139]:
for p in pkts:
    print (IP in p, TCP in p)

True True
True True
True True
True True
True True
True True
True True
True True
False False
False False


In [141]:
pkts[8].display()

###[ 802.3 ]### 
  dst       = 01:00:0c:cc:cc:cc
  src       = 70:6e:6d:1d:bb:04
  len       = 443
###[ LLC ]### 
     dsap      = 0xaa
     ssap      = 0xaa
     ctrl      = 3
###[ SNAP ]### 
        OUI       = 0xc
        code      = 0x2000
###[ Raw ]### 
           load      = '\x02\xb4\x81G\x00\x01\x00\x08SWCL\x00\x05\x01\tCisco IOS Software, Catalyst L3 Switch Software (CAT3K_CAA-UNIVERSALK9-M), Version 15.2(2)E6, RELEASE SOFTWARE (fc1)\nTechnical Support: http://www.cisco.com/techsupport\nCopyright (c) 1986-2016 by Cisco Systems, Inc.\nCompiled Sat 17-Dec-16 00:22 by prod_rel_team\x00\x06\x00\x16cisco WS-C3850-48U\x00\x02\x00\x11\x00\x00\x00\x01\x01\x01\xcc\x00\x04\xc0\xa8\n\x02\x00\x03\x00\x18GigabitEthernet1/0/4\x00\x04\x00\x08\x00\x00\x00(\x00\t\x00\x04\x00\n\x00\x06\x00\n\x00\x0b\x00\x05\x01\x00\x12\x00\x05\x00\x00\x13\x00\x05\x00\x00\x16\x00\x11\x00\x00\x00\x01\x01\x01\xcc\x00\x04\xc0\xa8\n\x02\x00\x1a\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\xff\xff\xff\xff\x00\x1f\x00\x05\

In [143]:
import ipaddress, itertools

In [144]:
def convertIP(ipv6):
    myIP = ipaddress.ip_address(ipv6)
    hextets = myIP.exploded.split(":")
    
    # from itertools recipes
    def grouper(iterable, n, fillvalue=None):
        args = [iter(iterable)] * n
        return itertools.zip_longest(*args, fillvalue=fillvalue)

    new_groups = [int(a+b, base=16) for (a, b) in grouper(hextets, 2)]
    
    return '.'.join(map(str, new_groups))

In [146]:
convertIP(pkts[0].src)

ValueError: '00:c1:b1:14:eb:31' does not appear to be an IPv4 or IPv6 address

In [138]:
pkts[0].display()

###[ Ethernet ]### 
  dst       = b8:ac:6f:36:0a:8b
  src       = 00:c1:b1:14:eb:31
  type      = IPv4
###[ IP ]### 
     version   = 4
     ihl       = 5
     tos       = 0x0
     len       = 40
     id        = 6964
     flags     = 
     frag      = 0
     ttl       = 55
     proto     = tcp
     chksum    = 0x9a72
     src       = 8.254.250.126
     dst       = 192.168.10.5
     \options   \
###[ TCP ]### 
        sport     = http
        dport     = 49188
        seq       = 3755453835
        ack       = 2495101083
        dataofs   = 5
        reserved  = 0
        flags     = FA
        window    = 329
        chksum    = 0xc534
        urgptr    = 0
        options   = []
###[ Padding ]### 
           load      = '\x00\x00\x00\x00\x00\x00'



In [135]:
p.display()

###[ 802.3 ]### 
  dst       = 01:00:0c:cc:cc:cc
  src       = 70:6e:6d:1d:bb:06
  len       = 443
###[ LLC ]### 
     dsap      = 0xaa
     ssap      = 0xaa
     ctrl      = 3
###[ SNAP ]### 
        OUI       = 0xc
        code      = 0x2000
###[ Raw ]### 
           load      = '\x02\xb4\x81E\x00\x01\x00\x08SWCL\x00\x05\x01\tCisco IOS Software, Catalyst L3 Switch Software (CAT3K_CAA-UNIVERSALK9-M), Version 15.2(2)E6, RELEASE SOFTWARE (fc1)\nTechnical Support: http://www.cisco.com/techsupport\nCopyright (c) 1986-2016 by Cisco Systems, Inc.\nCompiled Sat 17-Dec-16 00:22 by prod_rel_team\x00\x06\x00\x16cisco WS-C3850-48U\x00\x02\x00\x11\x00\x00\x00\x01\x01\x01\xcc\x00\x04\xc0\xa8\n\x02\x00\x03\x00\x18GigabitEthernet1/0/6\x00\x04\x00\x08\x00\x00\x00(\x00\t\x00\x04\x00\n\x00\x06\x00\n\x00\x0b\x00\x05\x01\x00\x12\x00\x05\x00\x00\x13\x00\x05\x00\x00\x16\x00\x11\x00\x00\x00\x01\x01\x01\xcc\x00\x04\xc0\xa8\n\x02\x00\x1a\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\xff\xff\xff\xff\x00\x1f\x00\x05\

In [127]:
p = pkts[6]

p.dst

p.display()

###[ Ethernet ]### 
  dst       = 33:33:00:01:00:02
  src       = 00:1e:4f:d4:ca:28
  type      = IPv6
###[ IPv6 ]### 
     version   = 6
     tc        = 0
     fl        = 0
     plen      = 111
     nh        = UDP
     hlim      = 1
     src       = fe80::f470:cd16:3cf6:bd7b
     dst       = ff02::1:2
###[ UDP ]### 
        sport     = dhcpv6_client
        dport     = dhcpv6_server
        len       = 111
        chksum    = 0x6cf2
###[ DHCPv6 Solicit Message ]### 
           msgtype   = SOLICIT
           trid      = 0xf44ed9
###[ DHCP6 Elapsed Time Option ]### 
              optcode   = ELAPSED_TIME
              optlen    = 2
              elapsedtime= 0.00 sec
###[ DHCP6 Client Identifier Option ]### 
                 optcode   = CLIENTID
                 optlen    = 14
                 \duid      \
                  |###[ DUID - Link-layer address plus time ]### 
                  |  type      = Link-layer address plus time
                  |  hwtype    = Ethernet (10Mb)
   

In [92]:
d = datetime.datetime.strptime("03/07/2017 08:55:58", "%d/%m/%Y %H:%M:%S")

In [98]:
p[Ether].time

1499082958.598308

In [91]:
p.time

1499082958.598308

In [93]:
d.timestamp()

1499108158.0

In [49]:
ipdict['8.254.250.126-192.168.10.5-80-49188-6-3/7/2017 8:55']

KeyError: '192.168.10.5-8.254.250.126-80-49188-6-3/7/2017 8:55'