# ANALYZING PCAP DATA derived from WIRESHARK

Data downloaded from http://malware-traffic-analysis.net/2020/02/21/page2.html

# Install Libraries

In [1]:
#pip install scapy

In [2]:
from scapy.all import rdpcap
import socket
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import nltk
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()

# Load Data

In [3]:
pkts_list = rdpcap('traffic-analysis.pcap')

In [4]:
len(pkts_list)
print('There are', len(pkts_list), 'rows including packets')

There are 9698 rows including packets


# Sample Packet Information

In [5]:
#Sample row info:

pkts_list[12].show()

###[ Ethernet ]### 
  dst       = a4:1f:72:c2:09:6a
  src       = 00:11:75:8c:fd:47
  type      = IPv4
###[ IP ]### 
     version   = 4
     ihl       = 5
     tos       = 0x0
     len       = 254
     id        = 17120
     flags     = 
     frag      = 0
     ttl       = 128
     proto     = udp
     chksum    = 0x8e36
     src       = 172.17.8.174
     dst       = 172.17.8.8
     \options   \
###[ UDP ]### 
        sport     = 62496
        dport     = 389
        len       = 234
        chksum    = 0xd425
###[ Raw ]### 
           load      = '0\x84\x00\x00\x00\xdc\x02\x01\x01c\x84\x00\x00\x00\xd3\x04\x00\n\x01\x00\n\x01\x00\x02\x01\x00\x02\x01\x00\x01\x01\x00\xa0\x84\x00\x00\x00\xac\xa3\x84\x00\x00\x00\x1d\x04\tDnsDomain\x04\x10one-hot-mess.com\xa3\x84\x00\x00\x00\x17\x04\x04Host\x04\x0fDESKTOP-TZMKHKC\xa3\x84\x00\x00\x00\x1e\x04\nDomainGuid\x04\x10\xc8\x7fl\x92@r\x9eM\x9aU[\xc0\xc7\xf4)\x80\xa3\x84\x00\x00\x00\r\x04\x05NtVer\x04\x04\x16\x00\x00 \xa3\x84\x00\x00\x00/\x04\x0bDnsHos

# Pcap IP Information

In [6]:
pkts_list[12]['IP'].src

'172.17.8.174'

In [7]:
pkts_list[12]['IP'].dst

'172.17.8.8'

In [8]:
pkts_list[12]['IP'].sport

62496

# Create Data Frame for IP Info

In [10]:
Source_IPs = []
Dest_IPs = []

for i in range (0,len(pkts_list)):
    Source_IPs.append(pkts_list[i]['IP'].src)
    Dest_IPs.append(pkts_list[i]['IP'].dst)

In [11]:
dict = {'Source_IPs': Source_IPs, 'Dest_IPs': Dest_IPs}  
Pcap_Data = pd.DataFrame(dict)
Pcap_Data.head()
#or
#Pcap_Data.sort_values("Source_IPs", ascending=False)

Unnamed: 0,Source_IPs,Dest_IPs
0,0.0.0.0,255.255.255.255
1,172.17.8.8,172.17.8.174
2,172.17.8.174,224.0.0.22
3,172.17.8.174,224.0.0.22
4,172.17.8.174,224.0.0.251


In [17]:
#you can drop local IPs
Pcap_Data.drop(Pcap_Data[Pcap_Data['Dest_IPs'] == '10.0.0.10'].index , inplace=True)
Pcap_Data.drop(Pcap_Data[Pcap_Data['Dest_IPs'] == '10.0.0.167'].index , inplace=True)
Pcap_Data.drop(Pcap_Data[Pcap_Data['Dest_IPs'] == '10.0.0.202'].index , inplace=True)
Pcap_Data.drop(Pcap_Data[Pcap_Data['Dest_IPs'] == '255.255.255.255'].index , inplace=True)

# Count Unique Destination IPs

In [18]:
from collections import Counter
#Count Unique IPs by generating a dict file
Unique_Dest_IPs = Counter(Pcap_Data['Dest_IPs'])

Unique_Dest_IPs is a Dictionary File

# Create Data Frame to see Unique IP Frequencies

In [19]:
IP_Freq_df = pd.DataFrame(list(Unique_Dest_IPs.items()), columns=['IPs', 'Frequency']).sort_values("Frequency", ascending=False)

In [20]:
IP_Freq_df.head()

Unnamed: 0,IPs,Frequency
0,172.17.8.174,6527
4,172.17.8.8,725
34,205.185.216.42,671
22,23.54.20.119,387
35,205.185.216.10,278


In [21]:
print("There are", len(IP_Freq_df['IPs']), 'Unique IP Addresses') 

There are 38 Unique IP Addresses


# Try to Get Host info by IP

In [24]:
socket.gethostbyaddr(IP_Freq_df['IPs'][22])

('a23-54-20-119.deploy.static.akamaitechnologies.com', [], ['23.54.20.119'])

In [25]:
IP_to_Host = []

for i in IP_Freq_df['IPs'][:len(IP_Freq_df['IPs'])]:
    try:
        IP_to_Host.append(socket.gethostbyaddr(i)[0])
    except: 
        IP_to_Host.append("Unable to get Hostname")

In [26]:
IP_to_Host[:5]

['Unable to get Hostname',
 'Unable to get Hostname',
 'map2.hwcdn.net',
 'a23-54-20-119.deploy.static.akamaitechnologies.com',
 'map2.hwcdn.net']

In [27]:
IP_to_Host_df = pd.DataFrame({"IP_to_Host":IP_to_Host})

# Concat IP Info to Data Frame

In [28]:
Result = pd.concat([IP_Freq_df, IP_to_Host_df], axis=1).sort_values("Frequency", ascending=False)

In [29]:
Result

Unnamed: 0,IPs,Frequency,IP_to_Host
0,172.17.8.174,6527,Unable to get Hostname
4,172.17.8.8,725,map2.hwcdn.net
34,205.185.216.42,671,a23-1-236-114.deploy.static.akamaitechnologies...
22,23.54.20.119,387,Unable to get Hostname
35,205.185.216.10,278,a23-1-236-125.deploy.static.akamaitechnologies...
24,91.211.88.122,243,Unable to get Hostname
20,64.4.54.18,94,Unable to get Hostname
8,204.79.197.200,80,Unable to get Hostname
23,13.107.3.128,75,Unable to get Hostname
14,49.51.172.56,60,Unable to get Hostname


In [30]:
Result[:25].iplot( kind="bar", x='IPs', title="Destination_IPs",  text='IP_to_Host')