In [1]:
import json
import pandas as pd
from typing import List, Dict

Honeytrap

In [2]:
def parse_honey_logs(logs: str) -> pd.DataFrame:
    json_logs = [json.loads(line) for line in logs.strip().split('\n')]
    flat_data = []
    for log in json_logs:
        flat_log = {
            'timestamp': log['timestamp'],
            'src_ip': log['src_ip'],
            'dst_port': log['dest_port'],
            'hostname': log['hostname'],
            'protocol': log['protocol'],
            'request_method': log['request_method'],
            'trapped': log['trapped'],
            'user_agent': log.get('user-agent', ''),
            'user_agent_browser': log.get('user-agent_browser', ''),
            'user_agent_browser_version': log.get('user-agent_browser_version', ''),
            'user_agent_os': log.get('user-agent_os', ''),
            'user_agent_os_version': log.get('user-agent_os_version', '')
        }
        headers = {
            k.replace('header_', ''): v 
            for k, v in log.items() 
            if k.startswith('header_')
        }
        for header_name, header_value in headers.items():
            flat_log[f'header_{header_name}'] = header_value
            
        flat_data.append(flat_log)

    df = pd.DataFrame(flat_data)
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    df['trapped'] = df['trapped'].map({'true': True, 'false': False})
    return df


with open('h0neytr4p\log\log.json', 'r') as f:
    logs_data = f.read()

df_honeytrap = parse_honey_logs(logs_data)
df_honeytrap
    

Unnamed: 0,timestamp,src_ip,dst_port,hostname,protocol,request_method,trapped,user_agent,user_agent_browser,user_agent_browser_version,user_agent_os,user_agent_os_version,header_accept,header_accept-encoding,header_connection,header_user-agent,header_accept-charset
0,2025-01-09 20:52:39+00:00,34.38.239.34,443,35.181.167.143,https,GET,False,python-requests/2.32.3,Python Requests,2.32,Other,,*/*,"gzip, deflate",keep-alive,python-requests/2.32.3,
1,2025-01-09 21:28:18+00:00,5.181.190.248,443,35.181.167.143,https,GET,False,,Other,,Other,,,,,,
2,2025-01-09 21:34:07+00:00,185.242.226.117,443,35.181.167.143,https,GET,False,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,Chrome,88.0,Windows,10.0,*/*,gzip,,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,
3,2025-01-09 21:47:00+00:00,54.184.244.82,443,35.181.167.143,https,GET,False,Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2...,Opera,12.0,Windows,7.0,,gzip,close,Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2...,utf-8


Dionaea

In [3]:
def parse_dionaea_logs(logs: str) -> pd.DataFrame:

    json_logs = [json.loads(line) for line in logs.strip().split('\n')]
    
    flat_data = []
    for log in json_logs:
        flat_log = {
            'timestamp': log['timestamp'],
            'src_ip': log['src_ip'],
            'src_port': log['src_port'],
            'dst_ip': log['dst_ip'],
            'dst_port': log['dst_port'],
            'protocol': log['connection']['protocol'],
            'transport': log['connection']['transport'],
            'connection_type': log['connection']['type']
        }
        flat_data.append(flat_log)

    df = pd.DataFrame(flat_data)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    return df

with open('dionaea\log\dionaea.json', 'r') as f:
    logs_data = f.read()

df_dionaea = parse_dionaea_logs(logs_data)

print("\nattacks by protocol")
print(df_dionaea['protocol'].value_counts())

print("\nattacks by source IP")
print(df_dionaea['src_ip'].value_counts())

df_dionaea


attacks by protocol
protocol
smbd        3161
mssqld         5
mysqld         5
httpd          4
ftpd           4
mongod         2
epmapper       2
Name: count, dtype: int64

attacks by source IP
src_ip
138.219.201.243    3149
65.188.139.20         5
46.105.132.55         3
172.105.128.12        2
45.33.109.17          2
66.240.236.116        2
60.253.15.176         1
103.203.59.5          1
13.83.42.216          1
138.94.46.193         1
172.169.205.214       1
47.251.92.56          1
179.43.191.146        1
198.235.24.130        1
52.234.238.238        1
91.223.169.83         1
47.236.99.61          1
185.247.137.129       1
64.62.197.156         1
64.62.197.86          1
205.210.31.106        1
4.151.220.177         1
216.218.206.118       1
193.200.78.250        1
161.35.129.149        1
172.169.109.108       1
Name: count, dtype: int64


Unnamed: 0,timestamp,src_ip,src_port,dst_ip,dst_port,protocol,transport,connection_type
0,2025-01-09 17:25:01.178190,60.253.15.176,45979,192.168.128.2,81,httpd,tcp,accept
1,2025-01-09 17:38:52.962253,66.240.236.116,53810,192.168.128.2,445,smbd,tcp,accept
2,2025-01-09 17:38:53.268495,66.240.236.116,53824,192.168.128.2,445,smbd,tcp,accept
3,2025-01-09 17:48:03.528461,172.105.128.12,39698,192.168.128.2,1433,mssqld,tcp,accept
4,2025-01-09 17:49:03.529130,172.105.128.12,43010,192.168.128.2,1433,mssqld,tcp,accept
...,...,...,...,...,...,...,...,...
3178,2025-01-09 22:06:29.955900,138.219.201.243,17330,192.168.128.2,445,smbd,tcp,accept
3179,2025-01-09 22:11:56.971569,172.169.205.214,55504,192.168.128.2,21,ftpd,tcp,accept
3180,2025-01-09 22:51:17.258003,138.94.46.193,3584,192.168.128.2,445,smbd,tcp,accept
3181,2025-01-09 23:10:49.285993,13.83.42.216,49256,192.168.128.2,21,ftpd,tcp,accept


In [4]:
df_dionaea[df_dionaea["dst_port"] == 56016]

Unnamed: 0,timestamp,src_ip,src_port,dst_ip,dst_port,protocol,transport,connection_type


Tanner

In [5]:
def parse_tanner_logs(logs: str) -> pd.DataFrame:
    json_logs = [json.loads(line) for line in logs.strip().split('\n')]
    flat_data = []
    
    for log in json_logs:
        flat_log = {
            'timestamp': log['timestamp'],
            'method': log['method'],
            'path': log['path'],
            'status': log['status'],
            'uuid': log['uuid'],
            'src_ip': log['peer']['ip'],
            'src_port': log['peer']['port'],
            'response_version': log['response_msg']['version'],
    
            'response_sess_uuid': log['response_msg']['response']['message']['sess_uuid'],
            'header_user-agent': log['headers']['user-agent']
        }
    
            
        flat_data.append(flat_log)

    df = pd.DataFrame(flat_data)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    return df


with open('tanner\log/tanner_report.json', 'r') as f:
    logs_data = f.read()

df_tanner = parse_tanner_logs(logs_data)

df_tanner

Unnamed: 0,timestamp,method,path,status,uuid,src_ip,src_port,response_version,response_sess_uuid,header_user-agent
0,2025-01-09 17:31:15.638815,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,45.156.129.130,33961,0.6.0,e488788a-b23f-4478-85a0-0044cbc533f0,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...
1,2025-01-09 17:31:16.402025,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,118.33.169.205,61200,0.6.0,56749d63-bb40-4d9c-a15f-3380a9fe6188,curl/7.88.1
2,2025-01-09 17:49:52.016631,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,193.200.78.250,51216,0.6.0,fd8a5344-eeb4-464e-9430-67648b7a082e,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...
3,2025-01-09 17:55:06.422076,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,87.121.79.3,50162,0.6.0,b20c9658-69da-468d-81c3-031f6182ff88,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...
4,2025-01-09 18:01:24.536264,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,69.164.217.74,53842,0.6.0,750d2021-463f-4a26-a8af-3a47a624fad4,Mozilla/5.0 zgrab/0.x
5,2025-01-09 18:16:12.619239,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,5.181.190.248,39022,0.6.0,9f9e7a5c-6877-4b8c-8788-d786afa9ce18,
6,2025-01-09 18:34:37.270527,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,45.156.128.47,49099,0.6.0,c27994b4-86b0-4cfa-84fe-7ce2336824fc,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...
7,2025-01-09 18:52:02.461874,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,213.32.122.81,33103,0.6.0,43628286-5108-4f10-ace2-2287a9e1c8fe,Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.3...
8,2025-01-09 19:01:14.508766,HEAD,/Core/Skin/Login.aspx,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,124.156.187.113,37836,0.6.0,cacba44b-4f08-402f-accc-6de5da0abe80,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...
9,2025-01-09 19:13:11.869482,GET,/,200,53cfc2cb-d8af-4542-9c94-ab8974604ac8,193.200.78.250,45228,0.6.0,3b22d575-63e8-4aaf-92a4-073558e8d31d,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...


Heralding

In [6]:
def parse_heralding_logs(logs: str) -> pd.DataFrame:
    json_logs = [json.loads(line) for line in logs.strip().split('\n')]
    flat_data = []
    
    for log in json_logs:
        flat_log = {
            'timestamp': log['timestamp'],
            'duration': log['duration'],
            'session_id': log['session_id'],
            'src_ip': log['source_ip'],
            'src_port': log['source_port'],
            'dst_ip': log['destination_ip'],
            'dst_port': log['destination_port'],
            'protocol': log['protocol'],
        }
    
        flat_data.append(flat_log)

    df = pd.DataFrame(flat_data)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    return df


with open('heralding/log/log_session.json', 'r') as f:
    logs_data = f.read()

df_heralding = parse_heralding_logs(logs_data)

df_heralding

Unnamed: 0,timestamp,duration,session_id,src_ip,src_port,dst_ip,dst_port,protocol
0,2025-01-09 17:26:09.579981,1,ea2e1eb0-5a6f-4107-80a5-d889b0515f50,167.94.138.39,58306,35.181.167.143,993,imaps
1,2025-01-09 17:26:16.229425,1,a170c4ab-4efb-4550-9eb1-0575916d022b,167.94.138.39,38208,35.181.167.143,993,imaps
2,2025-01-09 17:49:11.967062,0,a9549fcc-04c8-43d3-99b5-c9a3d19692e6,65.49.20.67,60422,35.181.167.143,993,imaps
3,2025-01-09 18:03:12.854654,0,aeff5bcf-4135-4757-bfdd-74f64baafd01,66.240.223.202,35128,35.181.167.143,5900,vnc
4,2025-01-09 18:28:52.852807,0,19ab7dc4-ac52-4cfd-a84b-f193c8840612,205.210.31.93,55898,35.181.167.143,143,imap
5,2025-01-09 18:43:47.525800,1,a6a267a0-9922-4b69-8380-248bc65453e8,64.62.197.182,51446,35.181.167.143,5900,vnc
6,2025-01-09 18:46:21.953237,0,7915468e-2b96-48c9-97ed-63a83e77bc79,198.235.24.138,60380,35.181.167.143,993,imaps
7,2025-01-09 18:58:23.180674,0,61e3031e-2ac8-47b5-825f-62952432761a,45.79.191.233,61000,35.181.167.143,143,imap
8,2025-01-09 19:05:36.570173,1,4bc30c57-1bd8-405d-aad7-53a858011705,71.6.167.142,59258,35.181.167.143,110,pop3
9,2025-01-09 19:10:14.362897,0,a4edacb3-756e-44c5-a0b1-1b610b15dabc,45.79.138.166,61000,35.181.167.143,110,pop3


Suricata

In [7]:
def parse_suricata_logs(logs: str) -> pd.DataFrame:
    json_logs = [json.loads(line) for line in logs.strip().split('\n')]
    flat_data = []
    
    for log in json_logs:
        flat_log = {
            'timestamp': log.get('timestamp'),
            'flow_id': log.get('flow_id'),
            'event_type': log.get('event_type'),
            'src_ip': log.get('src_ip'),
            'src_port': log.get('src_port'),
            'dst_ip': log.get('dest_ip'),
            'dst_port': log.get('dest_port'),
            'protocol': log.get('proto')
        }
        if 'alert' in log:
            flat_log.update({
                'alert_action': log['alert'].get('action'),
                'alert_signature': log['alert'].get('signature'),
                'alert_category': log['alert'].get('category'),
                'alert_severity': log['alert'].get('severity')
            })

        if 'flow' in log:
            flat_log.update({
                'flow_pkts_toserver': log['flow'].get('pkts_toserver'),
                'flow_pkts_toclient': log['flow'].get('pkts_toclient'),
                'flow_bytes_toserver': log['flow'].get('bytes_toserver'),
                'flow_bytes_toclient': log['flow'].get('bytes_toclient'),
                'flow_start': log['flow'].get('start')
            })
            
        flat_data.append(flat_log)

    df = pd.DataFrame(flat_data)
    
    if 'timestamp' in df.columns:
        df['timestamp'] = pd.to_datetime(df['timestamp'])
    if 'flow_start' in df.columns:
        df['flow_start'] = pd.to_datetime(df['flow_start'])
    
    return df


with open('suricata/log/eve.json', 'r') as f:
    logs = f.read()

df_suricata = parse_suricata_logs(logs)
print("\nattacks by protocol")
print(df_suricata['protocol'].value_counts())
df_suricata


attacks by protocol
protocol
TCP     37728
UDP      6757
ICMP        7
Name: count, dtype: int64


Unnamed: 0,timestamp,flow_id,event_type,src_ip,src_port,dst_ip,dst_port,protocol,alert_action,alert_signature,alert_category,alert_severity,flow_pkts_toserver,flow_pkts_toclient,flow_bytes_toserver,flow_bytes_toclient,flow_start
0,2025-01-09 17:22:18.477394+00:00,595589264802315,tls,172.31.7.94,43648,142.250.75.251,443,TCP,,,,,,,,,NaT
1,2025-01-09 17:22:19.173123+00:00,595589264802315,alert,142.250.75.251,443,172.31.7.94,43648,TCP,allowed,SURICATA STREAM reassembly depth reached,Generic Protocol Command Decode,3.0,147.0,762.0,11280.0,1099167.0,2025-01-09 17:22:18.466351+00:00
2,2025-01-09 17:22:23.872121+00:00,2042653232732981,alert,220.87.110.51,9275,172.31.7.94,80,TCP,allowed,SURICATA STREAM RST recv but no session,Generic Protocol Command Decode,3.0,1.0,1.0,58.0,60.0,2025-01-09 17:22:23.606664+00:00
3,2025-01-09 17:22:24.627768+00:00,162968779420388,alert,194.180.49.70,48927,172.31.7.94,3659,TCP,allowed,ET DROP Dshield Block Listed Source group 1,Misc Attack,2.0,1.0,0.0,60.0,0.0,2025-01-09 17:22:24.627768+00:00
4,2025-01-09 17:22:24.627768+00:00,162968779420388,alert,194.180.49.70,48927,172.31.7.94,3659,TCP,allowed,ET SCAN NMAP -sS window 1024,Attempted Information Leak,2.0,1.0,0.0,60.0,0.0,2025-01-09 17:22:24.627768+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44487,2025-01-09 23:12:04.589687+00:00,964899000692900,alert,52.189.75.157,56016,172.31.7.94,2455,TCP,allowed,ET SCAN Zmap User-Agent (Inbound),Detection of a Network Scan,3.0,4.0,4.0,387.0,273.0,2025-01-09 23:12:03.355730+00:00
44488,2025-01-09 23:12:07.006497+00:00,411144369347423,flow,172.31.7.94,45321,185.125.190.57,123,UDP,,,,,1.0,1.0,90.0,90.0,2025-01-09 23:07:05.947695+00:00
44489,2025-01-09 23:12:09.003494+00:00,288444297200982,flow,13.83.42.216,49256,172.31.7.94,21,TCP,,,,,5.0,3.0,338.0,229.0,2025-01-09 23:10:49.132694+00:00
44490,2025-01-09 23:12:09.991285+00:00,1581706931298761,flow,172.31.7.94,34882,169.254.169.123,123,UDP,,,,,1.0,1.0,90.0,90.0,2025-01-09 23:07:01.237197+00:00


In [8]:
df_suricata[df_suricata["src_port"] == 56016]

Unnamed: 0,timestamp,flow_id,event_type,src_ip,src_port,dst_ip,dst_port,protocol,alert_action,alert_signature,alert_category,alert_severity,flow_pkts_toserver,flow_pkts_toclient,flow_bytes_toserver,flow_bytes_toclient,flow_start
44487,2025-01-09 23:12:04.589687+00:00,964899000692900,alert,52.189.75.157,56016,172.31.7.94,2455,TCP,allowed,ET SCAN Zmap User-Agent (Inbound),Detection of a Network Scan,3.0,4.0,4.0,387.0,273.0,2025-01-09 23:12:03.355730+00:00


In [9]:
df_suricata['src_ip'].value_counts()

src_ip
172.31.7.94        12274
15.236.122.110     10452
138.219.201.243     9432
65.188.139.20       1585
86.245.60.17         693
                   ...  
87.236.176.222         1
178.32.72.209          1
47.237.98.120          1
147.185.133.48         1
167.94.138.106         1
Name: count, Length: 2611, dtype: int64

voir les données de tpot: attacker ip reputation, géoloc, suricata  
nettoyage bdd en enlevant les ip des IA qui crawlent internet  
classifier les types d'attaques robot, shodan, attaquant humain  
arriver a savoir si des attaquants ont déja été identifié sur d'autres attaques/ scénarios  
https://www.shodan.io/  
serenicity  
digitalocean  
https://cve.mitre.org/index.html et https://github.com/CVEProject   
proposition de vincent genot  
* DNS des CHU avec enregistrements  
* regarder ogo security  
* regarder https://attack.mitre.org/  
* regarder darktrace  

In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [11]:
"""headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
r=requests.get("https://viewdns.info/iplocation/?ip=5.181.190.248", headers=headers)
attributes_list = []
values_list = []
if(r.status_code == 200):
    soup = BeautifulSoup(r.content, 'html.parser')
    s = soup.find('tbody', class_='bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700')
    attributes = s.find_all('td', class_='px-6 py-4 whitespace-nowrap text-base font-medium text-gray-900 dark:text-gray-100')
    values = s.find_all('td', class_='px-6 py-4 whitespace-nowrap text-base text-gray-500 dark:text-gray-400')
    for attribute, value in zip(attributes, values):
        attributes_list.append(attribute.text)
        values_list.append(value.text)
        print(attribute.text,": ",value.text)
print(attributes_list)
print(values_list)"""

'headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}\nr=requests.get("https://viewdns.info/iplocation/?ip=5.181.190.248", headers=headers)\nattributes_list = []\nvalues_list = []\nif(r.status_code == 200):\n    soup = BeautifulSoup(r.content, \'html.parser\')\n    s = soup.find(\'tbody\', class_=\'bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700\')\n    attributes = s.find_all(\'td\', class_=\'px-6 py-4 whitespace-nowrap text-base font-medium text-gray-900 dark:text-gray-100\')\n    values = s.find_all(\'td\', class_=\'px-6 py-4 whitespace-nowrap text-base text-gray-500 dark:text-gray-400\')\n    for attribute, value in zip(attributes, values):\n        attributes_list.append(attribute.text)\n        values_list.append(value.text)\n        print(attribute.text,": ",value.text)\nprint(attributes_list)\nprint(values_list)'

In [12]:
attributes_list=['Honeypot','IP','Appearances','City', 'Zip Code', 'Region Code', 'Region Name', 'Country Code', 'Country Name', 'Latitude', 'Longitude']
ip_loc = pd.DataFrame(columns=attributes_list)
ip_loc

Unnamed: 0,Honeypot,IP,Appearances,City,Zip Code,Region Code,Region Name,Country Code,Country Name,Latitude,Longitude


In [13]:
def create_dataframe(honeypot, dataframe):
    for ip in dataframe['src_ip'].unique():
        ip_appearances = len(dataframe[dataframe['src_ip']== ip])
        values_list = [honeypot,ip, ip_appearances]
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
        r=requests.get(f"https://viewdns.info/iplocation/?ip={ip}", headers=headers)
        if(r.status_code == 200):
            soup = BeautifulSoup(r.content, 'html.parser')
            s = soup.find('tbody', class_='bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700')
            values = s.find_all('td', class_='px-6 py-4 whitespace-nowrap text-base text-gray-500 dark:text-gray-400')
            for value in values:
                values_list.append(value.text)
        ip_loc.loc[len(ip_loc)] = values_list

In [14]:
"""create_dataframe('honeytrap',df_honeytrap)
ip_loc"""

"create_dataframe('honeytrap',df_honeytrap)\nip_loc"

In [15]:
"""create_dataframe('dionaea',df_dionaea)
ip_loc"""

"create_dataframe('dionaea',df_dionaea)\nip_loc"

In [16]:
"""create_dataframe('tanner',df_tanner)
create_dataframe('heralding',df_heralding)"""

"create_dataframe('tanner',df_tanner)\ncreate_dataframe('heralding',df_heralding)"

In [17]:
"""create_dataframe('suricata',df_suricata)"""

"create_dataframe('suricata',df_suricata)"

In [18]:
"""unique_ip=df_suricata['src_ip'].unique()
unique_ip"""

"unique_ip=df_suricata['src_ip'].unique()\nunique_ip"

In [19]:
#ip_loc

In [20]:
"""ip_loc['Latitude']=pd.to_numeric(ip_loc['Latitude'])
ip_loc['Longitude']=pd.to_numeric(ip_loc['Longitude'])"""

"ip_loc['Latitude']=pd.to_numeric(ip_loc['Latitude'])\nip_loc['Longitude']=pd.to_numeric(ip_loc['Longitude'])"

In [21]:
#ip_loc.dtypes

In [22]:
data_ip = pd.read_csv("ip_loc.csv")
data_ip

Unnamed: 0,Honeypot,IP,Appearances,City,Zip Code,Region Code,Region Name,Country Code,Country Name,Latitude,Longitude
0,honeytrap,34.38.239.34,1,Brussels,1000,BRU,Brussels Capital,BE,Belgium,50.8534,4.3470
1,honeytrap,5.181.190.248,1,,0,,,PL,Poland,52.2394,21.0362
2,honeytrap,185.242.226.117,1,,0,,,US,United States,37.7510,-97.8220
3,honeytrap,54.184.244.82,1,Boardman,97818,OR,Oregon,US,United States,45.8401,-119.7050
4,dionaea,60.253.15.176,1,Gumi,392,47,Gyeongsangbuk-do,KR,South Korea,36.1135,128.3430
...,...,...,...,...,...,...,...,...,...,...,...
2674,suricata,183.248.219.168,1,,0,,,CN,China,34.7732,113.7220
2675,suricata,162.216.150.241,1,North Charleston,29415,SC,South Carolina,US,United States,32.8608,-79.9746
2676,suricata,174.83.22.126,2,Webster,1570,MA,Massachusetts,US,United States,42.0491,-71.8944
2677,suricata,45.79.172.21,1,Cedar Knolls,7927,NJ,New Jersey,US,United States,40.8229,-74.4592


In [25]:
data_ip["Appearances"].unique()

array([    1,     2,  3149,     3,     5, 12274,    76,    57,    23,
           4,   184,    26,    50,    33,     8,     6,   693,    20,
          10,    65,     7, 10452,    48,    72,    14,    34,    35,
          12,     9,    42,    96,    13,    74,    49,    31,    15,
          45,    41,    16,    11,   211,    27,    25,    59,    18,
          21,    47,    17,  9432,    30,    24,  1585,   312,    69,
          71,    39,   157], dtype=int64)

In [29]:
data_ip['echelle'] = pd.Series(dtype='int')

In [38]:
edges = [0, 5, 10, 50, 100, 500, 1000, 5000, float('inf')]
labels = [1, 2, 3, 4, 5, 6, 7, 8]
data_ip['echelle'] = pd.cut(data_ip['Appearances'], bins=edges, labels=labels, right=False)

In [39]:
data_ip

Unnamed: 0,Honeypot,IP,Appearances,City,Zip Code,Region Code,Region Name,Country Code,Country Name,Latitude,Longitude,echelle
0,honeytrap,34.38.239.34,1,Brussels,1000,BRU,Brussels Capital,BE,Belgium,50.8534,4.3470,1
1,honeytrap,5.181.190.248,1,,0,,,PL,Poland,52.2394,21.0362,1
2,honeytrap,185.242.226.117,1,,0,,,US,United States,37.7510,-97.8220,1
3,honeytrap,54.184.244.82,1,Boardman,97818,OR,Oregon,US,United States,45.8401,-119.7050,1
4,dionaea,60.253.15.176,1,Gumi,392,47,Gyeongsangbuk-do,KR,South Korea,36.1135,128.3430,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2674,suricata,183.248.219.168,1,,0,,,CN,China,34.7732,113.7220,1
2675,suricata,162.216.150.241,1,North Charleston,29415,SC,South Carolina,US,United States,32.8608,-79.9746,1
2676,suricata,174.83.22.126,2,Webster,1570,MA,Massachusetts,US,United States,42.0491,-71.8944,1
2677,suricata,45.79.172.21,1,Cedar Knolls,7927,NJ,New Jersey,US,United States,40.8229,-74.4592,1


In [34]:
import geopandas
import numpy as np
from shapely.geometry import Point
from plotly import express as px
import warnings
warnings.filterwarnings("ignore")

In [40]:
world = px.scatter_mapbox(
    data_ip.iloc[:68],
    lat="Latitude",
    lon="Longitude",
    color_continuous_scale=px.colors.cyclical.IceFire, zoom=2, color="Honeypot", size="echelle")
world.update_layout(mapbox_style="carto-positron")
world.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
world.show()

In [43]:
world = px.scatter_mapbox(
    data_ip.iloc[68:],
    lat="Latitude",
    lon="Longitude",
    color_continuous_scale=px.colors.cyclical.IceFire, zoom=2, color="echelle", size="echelle")
world.update_layout(mapbox_style="carto-positron")
world.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
world.show()