## Requirements:

In [3]:
import dateutil.parser as parser
import json
import pandas as pd
import requests

## Data processing:

In [4]:
data = pd.read_csv("queries.txt", sep=" ", header=None)

In [5]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,18-May-2021,16:34:13.003,queries:,info:,client,@0x55adcc672cc0,45.231.61.2#80,(pizzaseo.com):,query:,pizzaseo.com,IN,ANY,+E(0),(172.20.101.44)
1,18-May-2021,16:34:13.008,queries:,info:,client,@0x55adcd2227e0,190.6.62.130#57418,(ctldl.windowsupdate.com):,query:,ctldl.windowsupdate.com,IN,A,+E(0)D,(172.20.101.44)
2,18-May-2021,16:34:13.009,queries:,info:,client,@0x55adcc672cc0,45.231.61.2#80,(pizzaseo.com):,query:,pizzaseo.com,IN,ANY,+E(0),(172.20.101.44)
3,18-May-2021,16:34:13.009,queries:,info:,client,@0x55adcc672cc0,45.231.61.2#80,(pizzaseo.com):,query:,pizzaseo.com,IN,ANY,+E(0),(172.20.101.44)
4,18-May-2021,16:34:13.009,queries:,info:,client,@0x55adcd2227e0,111.90.159.121#80,(pizzaseo.com):,query:,pizzaseo.com,IN,ANY,+E(0),(172.20.101.44)


In [6]:
data.columns

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], dtype='int64')

Getting data time:

In [7]:
data_time = pd.concat([data[0], data[1]], axis=1)

Transform data time to timestamp:

In [8]:
timestamp = data_time[0] + " " + data_time[1] # Serie
timestamp = pd.DataFrame(timestamp)
timestamp

Unnamed: 0,0
0,18-May-2021 16:34:13.003
1,18-May-2021 16:34:13.008
2,18-May-2021 16:34:13.009
3,18-May-2021 16:34:13.009
4,18-May-2021 16:34:13.009
...,...
16962,18-May-2021 16:35:16.590
16963,18-May-2021 16:35:16.601
16964,18-May-2021 16:35:16.603
16965,18-May-2021 16:35:16.603


Getting data name:

In [9]:
name = data[9]
name

0                                             pizzaseo.com
1                                  ctldl.windowsupdate.com
2                                             pizzaseo.com
3                                             pizzaseo.com
4                                             pizzaseo.com
                               ...                        
16962                                                   sl
16963    a-ups-presence3-prod-azsc.westus2.cloudapp.azu...
16964                          10.210.250.142.in-addr.arpa
16965                      content-autofill.googleapis.com
16966         asm-api-prod-geo-am-skype.trafficmanager.net
Name: 9, Length: 16967, dtype: object

Getting data client IP:

In [10]:
client_ip = data[6].str.split("#", expand=True,)[:][0]
client_ip

0           45.231.61.2
1          190.6.62.130
2           45.231.61.2
3           45.231.61.2
4        111.90.159.121
              ...      
16962    75.133.131.196
16963    200.10.221.112
16964     200.48.13.150
16965    200.188.48.206
16966    190.242.62.142
Name: 0, Length: 16967, dtype: object

Getting data client name:

In [11]:
client_name = data[9]
client_name

0                                             pizzaseo.com
1                                  ctldl.windowsupdate.com
2                                             pizzaseo.com
3                                             pizzaseo.com
4                                             pizzaseo.com
                               ...                        
16962                                                   sl
16963    a-ups-presence3-prod-azsc.westus2.cloudapp.azu...
16964                          10.210.250.142.in-addr.arpa
16965                      content-autofill.googleapis.com
16966         asm-api-prod-geo-am-skype.trafficmanager.net
Name: 9, Length: 16967, dtype: object

Getting data type:

In [12]:
type = data[11]
type

0        ANY
1          A
2        ANY
3        ANY
4        ANY
        ... 
16962    ANY
16963      A
16964    PTR
16965      A
16966      A
Name: 11, Length: 16967, dtype: object

## Statistics:

In [13]:
print("Total records:", len(data))

Total records: 16967


Client IPs Rank:

In [14]:
print("Client IPs Rank:\n\n", client_ip.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')

Client IPs Rank:

 111.90.159.121     19.89%
45.231.61.2         7.37%
187.45.191.2        6.42%
190.217.123.244     4.35%
5.63.14.45          3.74%
                    ...  
24.103.158.162      0.01%
157.100.50.149      0.01%
201.218.17.234      0.01%
138.122.26.55       0.01%
34.68.43.67         0.01%
Name: 0, Length: 263, dtype: object


Host Rank:

In [15]:
print("Host Rank:\n\n", name.value_counts(normalize=True).mul(100).round(2).astype(str) + '%')

Host Rank:

 pizzaseo.com                                    27.26%
sl                                              20.09%
MNZ-efz.ms-acdc.office.com                       0.39%
global.asimov.events.data.trafficmanager.net     0.18%
www.google.com                                   0.18%
                                                 ...  
ads.yieLdmO.CoM                                  0.01%
m16.fysauey.me                                   0.01%
trends.google.com                                0.01%
cat.va1.vip.prod.criteo.com                      0.01%
b._dns-sd._udp.0.0.0.10.in-addr.arpa             0.01%
Name: 9, Length: 5095, dtype: object


## Request data:

Data request:

In [16]:
CLIENT_KEY = "d39a0f19-7278-4a64-a255-b7646d1ace80"
COLLECTOR_ID = "5ab55d08-ae72-4017-a41c-d9d735360288"
main_url = "https://api.lumu.io"
headers = {'content-type': "application/json"}

In [17]:
# Verifyng server response
response = requests.get(main_url)
response, response.text

(<Response [200]>, 'OK')

In [18]:
url_queries = main_url + "/collectors/" + COLLECTOR_ID + "/dns/queries?key=" + CLIENT_KEY
url_queries

'https://api.lumu.io/collectors/5ab55d08-ae72-4017-a41c-d9d735360288/dns/queries?key=d39a0f19-7278-4a64-a255-b7646d1ace80'

Data processing:

In [19]:
new_data = pd.concat([timestamp, name, client_ip, client_name, type], axis=1, ignore_index=True)
new_data.columns = ['timestamp', 'name', 'client_ip', 'client_name', 'type']
new_data

Unnamed: 0,timestamp,name,client_ip,client_name,type
0,18-May-2021 16:34:13.003,pizzaseo.com,45.231.61.2,pizzaseo.com,ANY
1,18-May-2021 16:34:13.008,ctldl.windowsupdate.com,190.6.62.130,ctldl.windowsupdate.com,A
2,18-May-2021 16:34:13.009,pizzaseo.com,45.231.61.2,pizzaseo.com,ANY
3,18-May-2021 16:34:13.009,pizzaseo.com,45.231.61.2,pizzaseo.com,ANY
4,18-May-2021 16:34:13.009,pizzaseo.com,111.90.159.121,pizzaseo.com,ANY
...,...,...,...,...,...
16962,18-May-2021 16:35:16.590,sl,75.133.131.196,sl,ANY
16963,18-May-2021 16:35:16.601,a-ups-presence3-prod-azsc.westus2.cloudapp.azu...,200.10.221.112,a-ups-presence3-prod-azsc.westus2.cloudapp.azu...,A
16964,18-May-2021 16:35:16.603,10.210.250.142.in-addr.arpa,200.48.13.150,10.210.250.142.in-addr.arpa,PTR
16965,18-May-2021 16:35:16.603,content-autofill.googleapis.com,200.188.48.206,content-autofill.googleapis.com,A


In [20]:
data_json = json.loads(new_data.to_json(orient='records'))

In [21]:
size_of_the_split = 500
total = int(len(data_json) / size_of_the_split)
size_of_the_split, total

(500, 33)

In [None]:
for i in range(total+1):
  put_data = data_json[i * size_of_the_split:(i + 1) * size_of_the_split]
  response = requests.request("POST", url_queries, json=put_data, headers=headers)
  response
  print(response, i)

In [48]:
put_data

[{'client_ip': '76.236.252.119',
  'client_name': 'c.bing.com',
  'name': 'c.bing.com',
  'timestamp': '18-May-2021 16:34:34.162',
  'type': 'A'},
 {'client_ip': '76.236.252.119',
  'client_name': 'c-msn-com-nsatc.trafficmanager.net',
  'name': 'c-msn-com-nsatc.trafficmanager.net',
  'timestamp': '18-May-2021 16:34:34.162',
  'type': 'A'},
 {'client_ip': '76.236.252.119',
  'client_name': 'img-s-msn-com.akamaized.net',
  'name': 'img-s-msn-com.akamaized.net',
  'timestamp': '18-May-2021 16:34:34.162',
  'type': 'A'},
 {'client_ip': '45.238.197.2',
  'client_name': '252.93.13.96.in-addr.arpa',
  'name': '252.93.13.96.in-addr.arpa',
  'timestamp': '18-May-2021 16:34:34.165',
  'type': 'PTR'},
 {'client_ip': '45.238.197.2',
  'client_name': '162.69.178.63.in-addr.arpa',
  'name': '162.69.178.63.in-addr.arpa',
  'timestamp': '18-May-2021 16:34:34.165',
  'type': 'PTR'},
 {'client_ip': '45.238.197.2',
  'client_name': '158.129.152.165.in-addr.arpa',
  'name': '158.129.152.165.in-addr.arpa',

In [23]:
203217 + 16967

220184

In [24]:
data_json[0]

{'client_ip': '45.231.61.2',
 'client_name': 'pizzaseo.com',
 'name': 'pizzaseo.com',
 'timestamp': '18-May-2021 16:34:13.003',
 'type': 'ANY'}

In [68]:
obj = [
  {
    "timestamp": "2021-01-06T14:37:02.228Z",
    "name": "www.example.com",
    "client_ip": "192.168.0.103",
    "client_name": "MACHINE-0987",
    "type": "A"
  },
  {'client_ip': '186.96.21.146',
  'client_name': 'client.teamviewer.com',
  'name': 'client.teamviewer.com',
  'timestamp': '18-May-2021 16:34:36.167',
  'type': 'A'
  }
]

In [69]:
resp = requests.request("POST", url_queries, json=obj, headers=headers)

In [70]:
resp

<Response [400]>

In [40]:
a = "[" + json.dumps(data_json[0]) + "]"

In [42]:
json.loads(a)

[{'client_ip': '45.231.61.2',
  'client_name': 'pizzaseo.com',
  'name': 'pizzaseo.com',
  'timestamp': '18-May-2021 16:34:13.003',
  'type': 'ANY'}]