In [2]:
import pandas as pd
        
from collections import Counter 
import operator

In [3]:
data_file = 'capture20110815-2.pcap.netflow.labeled'
# CTU-Malware-Capture-Botnet-46
# - Infected hosts
#     - 147.32.84.165: Windows XP English version Name: SARUMAN. Label: Botnet. Amount of bidirectional flows: 1802
# - Normal hosts:
#     - 147.32.84.170 (amount of bidirectional flows: 3620, Label: Normal-V42-Stribrek)
#     - 147.32.84.134 (amount of bidirectional flows: 2214, Label: Normal-V42-Jist)
#     - 147.32.84.164 (amount of bidirectional flows: 3444, Label: Normal-V42-Grill)
#     - 147.32.87.36 (amount of bidirectional flows: 28, Label: CVUT-WebServer. This normal host is not so reliable since is a webserver)
#     - 147.32.80.9 (amount of bidirectional flows: 10, Label: CVUT-DNS-Server. This normal host is not so reliable since is a dns server)
#     - 147.32.87.11 (amount of bidirectional flows: 11, Label: MatLab-Server. This normal host is not so reliable since is a matlab server)
infected = '147.32.84.165'
n = 10
k_s = [100, 1000, 5000]
columns=['Date','Duration','Protocol','SrcIP', 'srcPort','DstIP', 'DstPort', 'Flags', 'TOS', 'Packets', 'Bytes', 'Label']
lst=[]
with open(data_file) as fp:
    for cnt, line in enumerate(fp):
        if cnt!=0:
            data=line.split("\t")
            data = list(filter(None, data)) 
            date = data[0]
            duration = data[1]
            protocol = data[2]
            src = data[3].split(':')
            if len(src) < 2: # Set port to 0 if no port provided
                src.append(0)
            dst = data[5].split(':')
            if len(dst) < 2:
                dst.append(0)
            flags = data[6]
            tos = data[7]
            packets = data[8]
            byteno = data[9]
            flows = data[10]
            label = data[11].rstrip()
            lst.append([date, duration, protocol, src[0], src[1], dst[0], dst[1], flags, tos, packets, byteno, label])
dataset=pd.DataFrame(lst, columns=columns)
dataset.head()

Unnamed: 0,Date,Duration,Protocol,SrcIP,srcPort,DstIP,DstPort,Flags,TOS,Packets,Bytes,Label
0,2011-08-15 16:43:20.931,0.235,TCP,147.32.84.59,64131,164.8.32.159,54992,PA_,0,3,211,Background
1,2011-08-15 16:43:20.933,0.444,TCP,147.32.87.252,49218,147.32.80.13,3128,FPA_,0,300,19566,LEGITIMATE
2,2011-08-15 16:43:20.933,4.993,TCP,41.143.58.184,1328,147.32.86.179,58675,PA_,0,269,29373,Background
3,2011-08-15 16:43:20.933,0.187,TCP,147.32.86.155,51273,204.154.94.81,80,FA_,0,2,132,Background
4,2011-08-15 16:43:20.933,0.442,TCP,147.32.80.13,3128,147.32.87.252,49218,FPA_,0,579,728343,LEGITIMATE


In [14]:
# Counting IPs in the dataset
src_ip_map = {}
for index, row in dataset.iterrows():
    if row["SrcIP"] in src_ip_map:
        src_ip_map[row["SrcIP"]] += 1
    else:
        src_ip_map[row["SrcIP"]] = 1
        
dst_ip_map = {}
for index, row in dataset.iterrows():
    if row["DstIP"] in dst_ip_map:
        dst_ip_map[row["DstIP"]] += 1
    else:
        dst_ip_map[row["DstIP"]] = 1

combined_map = Counter(src_ip_map) + Counter(dst_ip_map) 


In [22]:
print("10 most abundant senders")
print(dict(sorted(src_ip_map.items(), key=operator.itemgetter(1), reverse=True)[:10]))
print("10 most abundant receivers")
print(dict(sorted(dst_ip_map.items(), key=operator.itemgetter(1), reverse=True)[:10]))
print("Combined:")
print(dict(sorted(combined_map.items(), key=operator.itemgetter(1), reverse=True)[:10]))

10 most abundant senders
{'147.32.84.229': 76790, '147.32.80.9': 53721, '147.32.84.59': 46028, '147.32.84.138': 20674, '147.32.84.118': 8113, '147.32.85.7': 6141, '147.32.85.25': 3929, '147.32.86.20': 3806, '147.32.84.164': 3622, '147.32.84.170': 3246}
10 most abundant receivers
{'147.32.84.229': 73235, '147.32.80.9': 53845, '147.32.84.59': 36631, '147.32.84.138': 20737, '147.32.84.118': 5567, '147.32.85.7': 5298, '147.32.80.13': 4174, '147.32.85.25': 3932, '147.32.84.164': 3214, '147.32.85.34': 2723}
Combined:
{'147.32.84.229': 150025, '147.32.80.9': 107566, '147.32.84.59': 82659, '147.32.84.138': 41411, '147.32.84.118': 13680, '147.32.85.7': 11439, '147.32.85.25': 7861, '147.32.80.13': 6891, '147.32.84.164': 6836, '147.32.84.170': 5458}
