# NXDomain Counts

DNS Response Code 3 (RCode) indicates "Domain name does not exist".  As the name suggests, this is present when a host will attempt to resolve a Domain name address that does not exist.  This may be an indicator that a host is using DGA.

## Hypothesis:
Clusters of users within the same department (e.g. Account) will tend to have a similar NXDomain response code count for a period of time.  If a user is generating an abnormal amount of NXDomain response codes, this is a potential indicator of compromise. 

## Datasets:





In [10]:
import os
import pandas as pd
import numpy as np
import datetime

In [32]:
pd.options.display.max_columns = None

In [22]:
dns_df = pd.read_csv('./dataset/dns.log', delimiter='\t', skiprows=[0,1,2,3,4,6,7], skipfooter=1, engine='python')
dns_df.columns = ['ts', 'uid', 'id.orig_h', 'id.resp_h', 'id.resp_p', 'proto', 'trans_id', 'query', 'qclass', 'qclass_name', 'qtype', 'qtype_name', 'rcode', 'rcode_name', 'AA', 'TC', 'RD', 'RA', 'Z', 'answersTTLS', 'rejected', 'not_req', 'not_req2']

dns_df['ts'] = pd.to_datetime(dns_df['ts'], unit='s')
dns_df['ts'] = dns_df['ts'].dt.date

dns_df = dns_df[['ts', 'id.orig_h', 'AA']]
dns_df.columns = ['date', 'srcip', 'response']




Unnamed: 0,ts,uid,id.orig_h,id.resp_h,id.resp_p,proto,trans_id,query,qclass,qclass_name,...,rcode_name,AA,TC,RD,RA,Z,answersTTLS,rejected,not_req,not_req2
64,2012-03-16,CsH94z3UV9RTfm3mci,192.168.202.89,57144,192.168.207.4,53,udp,51018,cwepo.ccbc.ccbcmd.edu,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
66,2012-03-16,CURJ7JHs7cNX3UrS7,192.168.202.79,58115,192.168.207.4,53,udp,1,5ef35ade37119963da199c876819e36dc570413b.certs...,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
78,2012-03-16,CJ9Mgk4NAzF0Dgz1Jl,192.168.202.87,42725,192.168.207.4,53,udp,43028,api.screenname.aol.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
79,2012-03-16,CI9E7rHTu01lWjQe9,192.168.202.87,52891,192.168.207.4,53,udp,21517,api.screenname.aol.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
80,2012-03-16,CiZuYm2sSfglSqCKnc,192.168.202.87,56191,192.168.207.4,53,udp,62911,api.screenname.aol.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
81,2012-03-16,CJjH3w3wGMtdnjluz,192.168.202.87,60440,192.168.207.4,53,udp,60105,api.screenname.aol.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
82,2012-03-16,COb29Q28psooFL5NSb,192.168.202.87,42497,192.168.207.4,53,udp,11617,_xmpp-client._tcp.gmail.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
83,2012-03-16,Cd00BL1YlcMtRTkzva,192.168.202.87,33154,192.168.207.4,53,udp,28064,gmail.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
84,2012-03-16,CroNpv4fatVLSc0zRj,192.168.202.87,44779,192.168.207.4,53,udp,1832,gmail.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F
85,2012-03-16,CRfB2E4miSQRbf2plk,192.168.202.87,36776,192.168.207.4,53,udp,45798,gmail.com,1,...,3,NXDOMAIN,F,F,T,F,0,-,-,F


In [3]:
# Create Response Summary
uniqueHosts = set(dns_df['srcip'].values.tolist())
uniqueDates = set(dns_df['date'].values.tolist())

summary_df = pd.DataFrame()
temp_df = pd.DataFrame()

for date in uniqueDates:
    filtered_df = dns_df[dns_df['date'] == date]
    
    for ip in uniqueHosts:
        temp_df = filtered_df[filtered_df['srcip'] == ip]
        temp_df = temp_df['response'].value_counts().to_frame().transpose()
        temp_df['date'] = date
        temp_df['srcip'] = ip

        summary_df = pd.concat([temp_df, summary_df], sort=False)

# Standardise dataframe
summary_df = summary_df.fillna(0)
summary_df.reset_index(inplace=True)
summary_df = summary_df[['date', 'srcip', '-', 'NXDOMAIN', 'NOERROR', 'NXRRSet', 'REFUSED', 'NOTAUTH', 'NOTIMP', 'FORMERR', 'SERVFAIL','YXRRSET','YXDOMAIN','NOTZONE']]


In [42]:
# Calculate proportion of NXDomain lookups
summary_df['total'] = summary_df['-'] + summary_df['NXDOMAIN'] + summary_df['NOERROR'] 

summary_df = summary_df[summary_df['total'] > 0] 
summary_df['NXDomain Percentage'] = (summary_df['NXDOMAIN'] / summary_df['total']) * 100

display(summary_df)

Unnamed: 0,date,srcip,-,NXDOMAIN,NOERROR,NXRRSet,REFUSED,NOTAUTH,NOTIMP,FORMERR,SERVFAIL,YXRRSET,YXDOMAIN,NOTZONE,total,NXDomain Percentage
0,2012-03-16,192.168.202.116,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.000000
1,2012-03-16,192.168.202.133,107.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179.0,40.223464
2,2012-03-16,192.168.202.122,129.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,129.0,0.000000
3,2012-03-16,10.10.10.10,69.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69.0,0.000000
5,2012-03-16,2001:dbb:c18:203:226:18ff:fef9:be98,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.000000
6,2012-03-16,fe80::4c9b:aad8:8a6a:7bb0,224.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,224.0,0.000000
7,2012-03-16,192.168.202.98,108.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108.0,0.000000
8,2012-03-16,fe80::223:dfff:fe97:4e12,21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,0.000000
15,2012-03-16,192.168.202.87,4396.0,304.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4723.0,6.436587
16,2012-03-16,192.168.0.199,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.000000


In [43]:
date1_df = summary_df[summary_df['date'] == datetime.date(year=2012,month=3,day=16)]
date2_df = summary_df[summary_df['date'] == datetime.date(year=2012,month=3,day=17)]


In [44]:
display(date1_df.sort_values(by=['NXDomain Percentage'], ascending=False))

Unnamed: 0,date,srcip,-,NXDOMAIN,NOERROR,NXRRSet,REFUSED,NOTAUTH,NOTIMP,FORMERR,SERVFAIL,YXRRSET,YXDOMAIN,NOTZONE,total,NXDomain Percentage
247,2012-03-16,192.168.204.69,0.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,100.000000
249,2012-03-16,192.168.203.62,0.0,736.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,742.0,99.191375
109,2012-03-16,192.168.202.63,0.0,139.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,144.0,96.527778
138,2012-03-16,192.168.203.61,32.0,549.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,589.0,93.208829
106,2012-03-16,192.168.202.81,1.0,14.0,1.0,0.0,32.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,16.0,87.500000
218,2012-03-16,192.168.202.101,266.0,251.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,517.0,48.549323
236,2012-03-16,192.168.204.70,2813.0,2082.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4899.0,42.498469
1,2012-03-16,192.168.202.133,107.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179.0,40.223464
92,2012-03-16,192.168.202.86,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,40.000000
58,2012-03-16,192.168.202.74,0.0,4.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,40.000000


In [20]:
display(date2_df)

Unnamed: 0,date,srcip,-,NXDOMAIN,NOERROR,NXRRSet,REFUSED,NOTAUTH,NOTIMP,FORMERR,SERVFAIL,YXRRSET,YXDOMAIN,NOTZONE,total,NXDomain Percentage
254,2012-03-17,192.168.202.133,0.0,650.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,650.0,100.000000
255,2012-03-17,192.168.202.122,113.0,10.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,124.0,8.064516
257,2012-03-17,192.168.26.254,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.000000
258,2012-03-17,2001:dbb:c18:203:226:18ff:fef9:be98,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000000
259,2012-03-17,fe80::4c9b:aad8:8a6a:7bb0,44.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.0,0.000000
260,2012-03-17,192.168.202.98,0.0,332.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,332.0,100.000000
261,2012-03-17,fe80::223:dfff:fe97:4e12,53.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.0,0.000000
262,2012-03-17,192.168.27.101,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.000000
263,2012-03-17,2001:dbb:c18:202:bc5c:15c1:ec81:1e08,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.000000
264,2012-03-17,192.168.25.203,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.000000


In [49]:
dns_df = pd.read_csv('./dataset/dns.log', delimiter='\t', skiprows=[0,1,2,3,4,6,7], skipfooter=1, engine='python')
dns_df.columns = ['ts', 'uid', 'id.orig_h', 'id.resp_h', 'id.resp_p', 'proto', 'trans_id', 'query', 'qclass', 'qclass_name', 'qtype', 'qtype_name', 'rcode', 'rcode_name', 'AA', 'TC', 'RD', 'RA', 'Z', 'answersTTLS', 'rejected', 'not_req', 'not_req2']

dns_df['ts'] = pd.to_datetime(dns_df['ts'], unit='s')
dns_df['ts'] = dns_df['ts'].dt.date

dns_df = dns_df[dns_df.AA.str.contains('NXDOMAIN')]
dns_df = dns_df[dns_df['id.orig_h'] == '192.168.202.79']
display(dns_df[['ts', 'AA', 'id.orig_h', 'qclass']])


Unnamed: 0,ts,AA,id.orig_h,qclass
66,2012-03-16,NXDOMAIN,192.168.202.79,5ef35ade37119963da199c876819e36dc570413b.certs...
361,2012-03-16,NXDOMAIN,192.168.202.79,www.metasploit.com
362,2012-03-16,NXDOMAIN,192.168.202.79,www.metasploit.com
363,2012-03-16,NXDOMAIN,192.168.202.79,www.metasploit.com
364,2012-03-16,NXDOMAIN,192.168.202.79,www.metasploit.com
1691,2012-03-16,NXDOMAIN,192.168.202.79,79.202.168.192.in-addr.arpa
1724,2012-03-16,NXDOMAIN,192.168.202.79,creativecommons.org
1725,2012-03-16,NXDOMAIN,192.168.202.79,www.dokuwiki.org
1726,2012-03-16,NXDOMAIN,192.168.202.79,creativecommons.org
1727,2012-03-16,NXDOMAIN,192.168.202.79,dokuwiki.org
