# Basic Analysis of Network Traffic Traces

In [1]:
import pandas as pd
from datetime import datetime, timezone

from data_collection.parse_pcap import pcap_to_pandas, send_rates

## Load a Packet Capture into Pandas

In [2]:
# Insert your own packet capture here.

pcap = pcap_to_pandas('/tmp/example-20200523.pcap') 
pcap.head(n=4)

Unnamed: 0,datetime,dns_query,dns_resp,ip_dst,ip_dst_int,ip_src,ip_src_int,is_dns,length,mac_dst,mac_dst_int,mac_src,mac_src_int,port_dst,port_src,protocol,time,time_normed
0,2020-05-23 21:47:42,,,204.80.104.218,3427822000.0,192.168.1.13,3232236000.0,False,1291,08:02:8e:92:27:27,8807074899751,3c:15:c2:d9:d3:50,66064161035088,8801,54012,UDP,1590288462.075696,0.0
1,2020-05-23 21:47:42,,,204.80.104.218,3427822000.0,192.168.1.13,3232236000.0,False,1291,08:02:8e:92:27:27,8807074899751,3c:15:c2:d9:d3:50,66064161035088,8801,54012,UDP,1590288462.075697,1e-06
2,2020-05-23 21:47:42,,,192.168.1.13,3232236000.0,204.80.104.218,3427822000.0,False,747,3c:15:c2:d9:d3:50,66064161035088,08:02:8e:92:27:27,8807074899751,54012,8801,UDP,1590288462.080226,0.00453
3,2020-05-23 21:47:42,,,192.168.1.13,3232236000.0,204.80.104.218,3427822000.0,False,852,3c:15:c2:d9:d3:50,66064161035088,08:02:8e:92:27:27,8807074899751,54012,8801,UDP,1590288462.080228,0.004532


In [3]:
pcap.shape

(3589, 18)

In [4]:
pcap = pcap.loc[:,['datetime','dns_query','dns_resp','ip_src','ip_dst',
                   'is_dns','length','port_src','port_dst','protocol']]

In [5]:
pcap[pcap['ip_src'] == '192.168.1.13'].head(2)

Unnamed: 0,datetime,dns_query,dns_resp,ip_src,ip_dst,is_dns,length,port_src,port_dst,protocol
0,2020-05-23 21:47:42,,,192.168.1.13,204.80.104.218,False,1291,54012,8801,UDP
1,2020-05-23 21:47:42,,,192.168.1.13,204.80.104.218,False,1291,54012,8801,UDP


**Example:** Create a Dataframe with unique IP addresses.

In [6]:
unique_dst_ip = pd.DataFrame(pcap['ip_dst'].unique())[0]
print(unique_dst_ip)

0      204.80.104.218
1        192.168.1.13
2         192.168.1.1
3        54.82.161.19
4       34.203.91.157
5       172.217.4.110
6        172.217.4.78
7        157.240.2.53
8         224.0.0.251
9     108.177.111.189
10      18.211.133.65
11     198.252.206.25
12        192.168.1.4
13               None
14        192.168.1.6
15      140.82.112.25
16       192.168.1.10
17      172.217.4.234
18        3.80.20.191
Name: 0, dtype: object


In [7]:
pkts_dst = pcap.loc[:,['datetime','ip_dst','length']]
pkts_dst.groupby(['ip_dst']).sum().sort_values(by='length',ascending=False)

Unnamed: 0_level_0,length
ip_dst,Unnamed: 1_level_1
192.168.1.13,1409869
204.80.104.218,764918
172.217.4.110,2403
172.217.4.78,1915
224.0.0.251,1378
34.203.91.157,888
192.168.1.1,859
54.82.161.19,690
18.211.133.65,690
192.168.1.6,484


## Basic Analysis of Traffic Using Pandas

Define a reverse lookup function.

In [8]:
from dns import resolver
from dns import reversename

# test reverse DNS lookup
addr = reversename.from_address('34.193.201.2')
print(resolver.query(addr, "PTR")[0])

ec2-34-193-201-2.compute-1.amazonaws.com.


In [9]:
# test reverse DNS lookup
addr = reversename.from_address('204.80.104.218')
print(resolver.query(addr, "PTR")[0])

zoomnye218mmr.zoom.us.


In [10]:
def reverse_lookup(ip):
    if str(ip) == 'None':
        return 'None'
    addr = reversename.from_address(ip)
    try:
        return str(resolver.query(addr, "PTR")[0])
    except Exception as e:
        return 'N/A'

Use the pandas 'apply' function to create a new column with the DNS names associated with each destination. 

Then look at the unique destination IP addresses in the trace.

In [11]:
pcap['name_dst'] = pcap['ip_dst'].apply(reverse_lookup)

In [12]:
unique_dst_name = pd.DataFrame(pcap['name_dst'].unique())[0]
print(unique_dst_name)

0                         zoomnye218mmr.zoom.us.
1                                            N/A
2      ec2-54-82-161-19.compute-1.amazonaws.com.
3     ec2-34-203-91-157.compute-1.amazonaws.com.
4                     ord36s04-in-f14.1e100.net.
5                     ord37s18-in-f14.1e100.net.
6            whatsapp-cdn-shv-01-ort2.fbcdn.net.
7     ec2-18-211-133-65.compute-1.amazonaws.com.
8                             stackoverflow.com.
9                                           None
10              lb-140-82-112-25-iad.github.com.
11                   ord30s31-in-f234.1e100.net.
12      ec2-3-80-20-191.compute-1.amazonaws.com.
Name: 0, dtype: object


Write functions to count ("sum") the length field so that we can know how much total traffic in bytes is sent to each destination, either by IP address or by name.

In [13]:
def volume_stats_by_ip(pcap):
    return pcap.loc[:,['ip_dst','length']].groupby('ip_dst').sum().sort_values(by=['length'], ascending=False)


def volume_stats_by_name(pcap):
    return pcap.loc[:,['name_dst','length']].groupby('name_dst').sum().sort_values(by=['length'], ascending=False)

In [14]:
volume_stats_by_ip(pcap)

Unnamed: 0_level_0,length
ip_dst,Unnamed: 1_level_1
192.168.1.13,1409869
204.80.104.218,764918
172.217.4.110,2403
172.217.4.78,1915
224.0.0.251,1378
34.203.91.157,888
192.168.1.1,859
54.82.161.19,690
18.211.133.65,690
192.168.1.6,484


In [15]:
volume_stats_by_name(pcap)

Unnamed: 0_level_0,length
name_dst,Unnamed: 1_level_1
,1413206
zoomnye218mmr.zoom.us.,764918
ord36s04-in-f14.1e100.net.,2403
ord37s18-in-f14.1e100.net.,1915
ec2-34-203-91-157.compute-1.amazonaws.com.,888
ec2-18-211-133-65.compute-1.amazonaws.com.,690
ec2-54-82-161-19.compute-1.amazonaws.com.,690
,295
whatsapp-cdn-shv-01-ort2.fbcdn.net.,163
lb-140-82-112-25-iad.github.com.,160
