In [None]:
from pathlib import Path
import os
if not str(Path.cwd()).endswith('/Cloud-Telescope'):
    os.chdir(Path.cwd().parent)
    print(f'New working dir is {Path.cwd()}')    

try:
    import dns
except ModuleNotFoundError:
    import sys
    !{sys.executable} -m pip install dnspython
    import dns
    
    
import importlib
import pandas as pd

import src.d01_data.read_files
importlib.reload(src.d01_data.read_files)

import src.d01_data.save_files
importlib.reload(src.d01_data.save_files)

import src.d01_data.log_names_enum
importlib.reload(src.d01_data.log_names_enum)

import src.d03_visiualisation.tabel_visiualisation
importlib.reload(src.d03_visiualisation.tabel_visiualisation)

import src.d03_visiualisation.bar_diagram
importlib.reload(src.d03_visiualisation.bar_diagram)

import src.d03_visiualisation.head_map
importlib.reload(src.d03_visiualisation.head_map)

import src.d03_visiualisation.graf_arrival_rate
importlib.reload(src.d03_visiualisation.graf_arrival_rate)

import src.d03_visiualisation.seaborn_bar_diagram
importlib.reload(src.d03_visiualisation.seaborn_bar_diagram)

import src.d04_analysation.count_percent
importlib.reload(src.d04_analysation.count_percent)

import src.d02_intermediate.filter_telescope_ips
importlib.reload(src.d02_intermediate.filter_telescope_ips)

import src.d04_analysation.backscatter
importlib.reload(src.d04_analysation.backscatter)

import src.d04_analysation.icmp_backscatter_analysis
importlib.reload(src.d04_analysation.icmp_backscatter_analysis)

import src.d04_analysation.reverse_dns_lookup
importlib.reload(src.d04_analysation.reverse_dns_lookup)

import src.config
importlib.reload(src.config)

### load conn

In [None]:
from src.d01_data.read_files import load_latest_dataframe
from src.d01_data.log_names_enum import LogNamesEnum

conn: pd.DataFrame = load_latest_dataframe(LogNamesEnum.conn)
if conn['creation_resp'].dtypes != 'float64':
    conn['creation_resp'] = pd.to_datetime(conn['creation_resp']).astype('int64') / 10**9
    conn['creation_resp'] = conn['creation_resp'].astype('float64')
if conn['deletion_resp'].dtypes != 'float64':
    conn['deletion_resp'] = pd.to_datetime(conn['deletion_resp'], errors='coerce')
    conn['deletion_resp'] = conn['deletion_resp'].astype('int64') / 10**9
    conn['deletion_resp'] = conn['deletion_resp'].astype('float64')
if conn['deletion_orig'].dtypes != 'float64':
    conn['deletion_orig'] = pd.to_datetime(conn['deletion_orig']).astype('int64') / 10**9
    conn['deletion_orig'] = conn['deletion_orig'].astype('float64')
if conn['creation_orig'].dtypes != 'float64':
    conn['creation_orig'] = pd.to_datetime(conn['creation_orig']).astype('int64') / 10**9
    conn['creation_orig'] = conn['creation_orig'].astype('float64')
    
conn_udp = conn[conn['proto'] == 'udp']

### calculate backscatter traffic

In [None]:
from src.d02_intermediate.filter_telescope_ips import *
from src.d04_analysation.count_percent import *

def calculate_backscatter_traffic():
    backscatter_traffic: pd.DataFrame = filter_backscatter_traffic(LogNamesEnum.conn, conn_udp)
    return backscatter_traffic

backscatter_traffic: pd.DataFrame = calculate_backscatter_traffic()
backscatter_calc: pd.DataFrame = column_calculation(backscatter_traffic, {'id.resp_h': -1}, False)[0].result_as_df
backscatter_traffic = backscatter_traffic[backscatter_traffic['id.resp_h'].isin(backscatter_calc['id.resp_h'].tolist())]

backscatter_traffic_unique_attack_resp_ip: pd.DataFrame = backscatter_traffic.drop_duplicates(subset=['id.resp_h'], keep='first')

backscatter_traffic_unique_attack: pd.DataFrame = backscatter_traffic.drop_duplicates(subset=['resp_name_2025-01-27'], keep='first')

#backscatter_traffic_unique_attack: pd.DataFrame = backscatter_traffic.drop_duplicates(subset=['resp_name_2025-01-27'], keep='first')

### print backscatter resp IP for CAIDA script

In [None]:
def print_backscatter_resp_ip():
    resp_ips: list[str] = backscatter_traffic_unique_attack['id.resp_h'].to_list()
    print(len(resp_ips))
    
    print_output: str = '' 
    for index, ip in enumerate(resp_ips):
        if index == 0:
            print_output = f'src host {ip}'    
        else:
            print_output = f'{print_output} or src host {ip}'
    
    print_output = f'{print_output} and udp'
    print(print_output)

def print_backscatter_ip():
    resp_ips: list[str] = backscatter_traffic_unique_attack['id.resp_h'].to_list()
    print(len(resp_ips))
    
    print_output: str = '' 
    for index, ip in enumerate(resp_ips):
        if index == 0:
            print_output = f'host {ip}'    
        else:
            print_output = f'{print_output} or {ip}'
    
    print(print_output)
print_backscatter_resp_ip()
#print_backscatter_ip()

### show attacked ports 

In [None]:
from src.d03_visiualisation.bar_diagram import *
from src.d04_analysation.count_percent import *
from src.d03_visiualisation.seaborn_bar_diagram import *

udp_services = {
    53: "DNS",
    5060: "SIP",
    123: "NTP",
    5683: "CoAP",
    3702: "WS-Discovery",
    161: "SNMP",
    37020: "WS-Discovery (Secure)",
    389: "LDAP",
    1900: "SSDP",
    33434: "Traceroute",
    1194: "OpenVPN",
    3283: "Apple Remote Desktop",
    3478: "STUN",
    9034: "Tor ORPort",
    500: "ISAKMP (IPsec)",
    137: "NetBIOS",
    27015: "Source Engine",
    111: "ONC RPC",
    27960: "Quake 3",
    19: "Chargen",
    5353: "mDNS",
    5355: "LLMNR",
    4789: "VXLAN",
    3544: "Teredo",
    514: "Syslog",
    20000: "DNP3",
    5072: "ayiya",
    443: "QUIC",
    6081: "Geneve"
}

def show_attacked_ports():
    
    attack_ports_calculation: pd.DataFrame = column_calculation(backscatter_traffic_unique_attack_resp_ip, {'id.resp_p': -1}, False)[0].result_as_df
    
    
    for index, port in enumerate(attack_ports_calculation['id.resp_p'].tolist()):
        service = udp_services.get(int(port))
        attack_ports_calculation.loc[index, 'service'] = service
    
    attack_ports_calculation = attack_ports_calculation.drop(index=attack_ports_calculation.index[0])
    
    
    seaborn_custom_bar_diagram(df=attack_ports_calculation, x_column='service', y_column='total', x_lable='Protokoll', y_lable='Anzahl Angriffe', rotation=40, saving_name='ddos_ports2', diagram_width=5, diagram_height=4)
    
    return attack_ports_calculation
    
show_attacked_ports()


### show attacked countries

In [None]:
from src.d03_visiualisation.bar_diagram import *
from src.d04_analysation.count_percent import *

def show_attacked_countries():
    
    attack_countries_calculation: pd.DataFrame = column_calculation(backscatter_traffic_unique_attack_resp_ip, {'resp_country_location': -1}, False)[0].result_as_df
    attack_countries_calculation = attack_countries_calculation.drop(index=attack_countries_calculation.index[0])
    attack_countries_calculation = attack_countries_calculation.head(20)
    
    
    seaborn_custom_bar_diagram(df=attack_countries_calculation, x_column='resp_country_location', y_column='total', x_lable='Ziel-Land des Angriffs', y_lable='Anzahl Angriffe', rotation=40, saving_name='ddos_country', diagram_width=10, diagram_height=5)
    
    return attack_countries_calculation
    
re = show_attacked_countries()
re
#print(re['total'].sum())

### show destination (country / asn) mapped source (country / asn)

In [None]:
from src.d03_visiualisation.head_map import *
from src.d04_analysation.backscatter import *

def show_attacked_mapped_countries():

    top_attacked_countires = column_calculation(backscatter_traffic_unique_attack_resp_ip, {'resp_country_location': -1}, False)[0].result_as_df.head(21)
    backscatter_traffic_unique_attack_top = backscatter_traffic_unique_attack_resp_ip[backscatter_traffic_unique_attack_resp_ip['resp_country_location'].isin(top_attacked_countires['resp_country_location'])]
    # countries
    df_mapped_countries: pd.DataFrame = calculate_mapped_asn_attacked_ocean(backscatter_unique=backscatter_traffic_unique_attack_top, compare_column_x='orig_country_location', compare_column_y='resp_country_location', threshold_percent=1.0, sort=False)
    
    show_head_int_map_for_df(df=df_mapped_countries, y_index='resp_country_location', x_columns='orig_country_location', value_column='total', x_lable='orig', y_lable='resp')
    
    df_mapped_countries2: pd.DataFrame = calculate_mapped_asn_attacked_ocean(backscatter_unique=backscatter_traffic_unique_attack_top, compare_column_x='resp_country_location', compare_column_y='orig_country_location', threshold_percent=1.0, sort=False)
    return df_mapped_countries2
    
    show_head_int_map_for_df(df=df_mapped_countries2, y_index='orig_country_location', x_columns='resp_country_location', value_column='total', x_lable='Zielland des Angriffes', y_lable='Serverstandort (src-IP)', save_name='____ddos_comparison')
    
    # asn
    # df_mapped_asn: pd.DataFrame =calculate_mapped_asn_attacked_ocean(backscatter_unique=backscatter_traffic_unique_attack, compare_column_x='orig_id_rir', compare_column_y='resp_id_rir', threshold_percent=0.8, sort=True)
    # 
    # show_head_map_for_df(df=df_mapped_asn, y_index='resp_id_rir', x_columns='orig_id_rir', value_column='total', x_lable='orig', y_lable='resp').show()
    #rir = backscatter_traffic_unique_attack
    #rir['resp_admin_rir'] = backscatter_traffic_unique_attack['resp_admin_rir'].apply(lambda x: x.split('-')[-1])
    #rir['orig_admin_rir'] = backscatter_traffic_unique_attack['orig_admin_rir'].apply(lambda x: x.split('-')[-1])
    #df_mapped_network: pd.DataFrame = calculate_mapped_asn_attacked_ocean(backscatter_unique=rir, compare_column_x='resp_admin_rir', compare_column_y='orig_admin_rir', threshold_percent=1.4, sort=False)
    
    #show_head_int_map_for_df(df=df_mapped_network, y_index='orig_admin_rir', x_columns='resp_admin_rir', value_column='total', x_lable='orig', y_lable='resp', save_name='ddos_comparison')
    
res = show_attacked_mapped_countries()

### show attacked Server Type 

In [None]:
from src.d03_visiualisation.bar_diagram import *
from src.d04_analysation.count_percent import *

def show_attacked_server_type():

    attack_server_type: pd.DataFrame = column_calculation(backscatter_traffic, {'resp_as_type_rir': -1}, False)[0].result_as_df
    attack_server_type = attack_server_type.drop(index=attack_server_type.index[0])
    #attack_countries_calculation = attack_server_type.head(20)
    basic_bar_diagram(df=attack_server_type, x_column='resp_as_type_rir', y_column='total', x_lable='Type', y_lable='Anzahl DOS/DDOS Angriffe').show() 
    
    return attack_server_type
    
show_attacked_server_type()    

### Calculate dns and rir name

In [None]:
from src.d04_analysation.backscatter import *
from src.d04_analysation.reverse_dns_lookup import *
def calc_dns_and_rir_name() -> DnsRirResponse:
    dns_rir_response_calc: DnsRirResponse = calc_rir_and_dns_information(backscatter_traffic=backscatter_traffic, backscatter_traffic_unique_attack_resp_ip=backscatter_traffic_unique_attack_resp_ip)
    
    return dns_rir_response_calc

dns_rir_response: DnsRirResponse = calc_dns_and_rir_name()
dns_rir_response.attack_server_name


### show attacked dns and rir name

In [None]:
from src.d03_visiualisation.bar_diagram import *
from src.d04_analysation.count_percent import *


def show_attacked_server_type(attack_server_name: pd.DataFrame):
    attack_server_name = attack_server_name.drop(index=attack_server_name.index[0])
    
    attack_server_name = attack_server_name.head(20)
    seaborn_bar_diagram(df=attack_server_name, x_column='result', y_column='total', x_lable='rir-Name / DNS-Reverse Eintrag', y_lable='Anzahl backscatter-Anfragen', rotation=30, saving_name='ddos_rirname').show()
    
    return attack_server_name
    
ddos_attack_server_name = show_attacked_server_type(dns_rir_response.attack_server_name)    
ddos_attack_server_name

### calc darknet backscatter

In [None]:
caida_backscatter = load_latest_dataframe(LogNamesEnum.mbackscatterconn)
print(len(caida_backscatter))

In [None]:
caida_backscatter_unique = caida_backscatter.drop_duplicates(subset='id.orig_p', keep='first')


In [None]:
caida_backscatter_count = caida_backscatter['id.orig_p'].value_counts()

In [None]:
caida_backscatter_count_df = caida_backscatter_count.reset_index()
caida_backscatter_count_df.columns = ['id.orig_p', 'count']

In [None]:
backscatter_dns: pd.DataFrame = dns_rir_response.backscatter_dns

darknet_backscatter: pd.DataFrame = backscatter_dns

darknet_backscatter_unique: pd.DataFrame = darknet_backscatter.drop_duplicates(subset='id.resp_h', keep='first')

### show darknet comparison

In [None]:
from src.d03_visiualisation.bar_diagram import *
def show_darknet_comparison():

    down_time_start = pd.Timestamp("2025-01-25 11:00:00").timestamp()
    down_time_end = pd.Timestamp("2025-01-27 19:00:00").timestamp()
    
    backscatter_traffic_unique_attack_resp_ip_time = backscatter_traffic_unique_attack_resp_ip[(backscatter_traffic_unique_attack_resp_ip['ts'] < down_time_start) | (backscatter_traffic_unique_attack_resp_ip['ts'] > down_time_end)]
    
    backscatter_traffic_time = backscatter_traffic[(backscatter_traffic['ts'] < down_time_start) | (backscatter_traffic['ts'] > down_time_end)]

    
    darknet_comparison: pd.DataFrame = pd.DataFrame({
        'labels': ["Cloud-Teleskop", 'CAIDA-Teleskop'],
        'Angriffe': [len(backscatter_traffic_unique_attack_resp_ip_time), len(darknet_backscatter_resp_ips)],
        'Verbindungen': [len(backscatter_traffic_time), len(darknet_backscatter)]
    })
    
    plot_overlapping_two_bars(darknet_comparison['Angriffe'].tolist(), darknet_comparison['Verbindungen'].tolist(), darknet_comparison['labels'].tolist(), saving_name='ddos_caida_comparison', legend=True, label_1='Angriffe', label_2='Verbindungen', width=4, height=5, gap=0.12, bar_width=0.1).show()
    return darknet_comparison
    
show_darknet_comparison()

### show darknet backscatter

In [None]:
from src.d04_analysation.backscatter import *
from src.d04_analysation.reverse_dns_lookup import *
darknet_backscatter_calculation: pd.DataFrame = column_calculation(darknet_backscatter, {'result': -1}, False)[
        0].result_as_df

caida_backscatter_count_df_top20 = caida_backscatter_count_df.head(20)

merge_rir_info_df = dns_rir_response.backscatter_dns[['id.resp_h', 'result']]
caida_backscatter_count_df_top20_rir = caida_backscatter_count_df_top20.merge(merge_rir_info_df, how='left', left_on='id.orig_p', right_on='id.resp_h')

seaborn_bar_diagram(df=caida_backscatter_count_df_top20_rir, x_column='result', y_column='count', x_lable='rir-Name / DNS-Reverse Eintrag', y_lable='Anzahl backscatter-Anfragen', rotation=30, saving_name='caida_ddos_rirname').show()

In [None]:
caida_backscatter_count_df_top20_rir.drop_duplicates(subset='result', keep='first')

### calculate icmp backscatter traffic (if attacked Server is out of memory)

In [None]:
from src.d04_analysation.icmp_backscatter_analysis import *

def icmp_analysis():
    icmp_backscatter: pd.DataFrame = map_icmp_backscatter(conn=conn, backscatter_traffic_unique=backscatter_traffic_unique_attack, backscatter_traffic_complete=backscatter_traffic)
    
    return icmp_backscatter

icmp_backscatter_mapped = icmp_analysis()
icmp_backscatter_mapped