In [None]:
import math
import subprocess
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

In [None]:
! gdown --id 1GHH5pz4OP2PWmCKxqSvR0Oq3FwI9mrPA
! gdown --id 1ofjTfvsPyIwel7R8Vmx9XHS_NOdWr6Hd
! gdown --id 1-Hq4hH_Qeo1MWeQeinBVpwaIOFUDY7z2

Downloading...
From: https://drive.google.com/uc?id=1GHH5pz4OP2PWmCKxqSvR0Oq3FwI9mrPA
To: /content/server_logs - Copy.csv
100% 6.71k/6.71k [00:00<00:00, 25.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1ofjTfvsPyIwel7R8Vmx9XHS_NOdWr6Hd
To: /content/new_server_logs.csv
100% 3.50k/3.50k [00:00<00:00, 10.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-Hq4hH_Qeo1MWeQeinBVpwaIOFUDY7z2
To: /content/dataset_archived_big.csv
100% 20.7k/20.7k [00:00<00:00, 35.8MB/s]


In [None]:
def train_detection_model(server_logs_file):

    server_logs = pd.read_csv(server_logs_file)
    server_logs = server_logs.dropna()

    server_logs['request_method'] = server_logs['request_method'].astype('category')
    server_logs['request_method_encoded'] = server_logs['request_method'].cat.codes

    scaler = StandardScaler()
    server_logs['request_duration_scaled'] = scaler.fit_transform(server_logs['request_duration'].values.reshape(-1, 1))

    server_logs['request_rate_per_ip'] = server_logs.groupby('source_ip')['timestamp'].transform('count')
    server_logs['success_to_failure_ratio'] = server_logs.groupby('source_ip').apply(lambda x: (x['response_status'] // 100 == 2).sum() / max((x['response_status'] // 100 != 2).sum(), 1))
    server_logs['avg_request_duration_per_ip'] = server_logs.groupby('source_ip')['request_duration_scaled'].transform('mean')
    server_logs['url_entropy_per_ip'] = server_logs.groupby('source_ip')['request_url'].transform(lambda x: -sum(p * math.log(p) for p in x.value_counts(normalize=True)))

    X = server_logs[['request_rate_per_ip', 'success_to_failure_ratio', 'avg_request_duration_per_ip', 'url_entropy_per_ip', 'request_method_encoded']]
    y = (server_logs['response_status'] // 100 != 2).astype(int)

    X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)

    imputer = SimpleImputer(strategy='mean')
    X_train_imputed = imputer.fit_transform(X_train)
    X_test_imputed = imputer.transform(X_test)

    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train_imputed, y_train)

    return rf, imputer

In [None]:
def detect_ddos(new_logs_file, detection_model, imputer):

    new_logs = pd.read_csv(new_logs_file)
    new_logs['request_method'] = new_logs['request_method'].astype('category')
    new_logs['request_method_encoded'] = new_logs['request_method'].cat.codes
    scaler = StandardScaler()
    new_logs['request_duration_scaled'] = scaler.fit_transform(new_logs['request_duration'].values.reshape(-1, 1))
    new_logs['request_rate_per_ip'] = new_logs.groupby('source_ip')['timestamp'].transform('count')
    new_logs['success_to_failure_ratio'] = new_logs.groupby('source_ip').apply(lambda x: (x['response_status'] // 100 == 2).sum() / max((x['response_status'] // 100 != 2).sum(), 1))
    new_logs['avg_request_duration_per_ip'] = new_logs.groupby('source_ip')['request_duration_scaled'].transform('mean')
    new_logs['url_entropy_per_ip'] = new_logs.groupby('source_ip')['request_url'].transform(lambda x: -sum(p * math.log(p) for p in x.value_counts(normalize=True)))

    X_new = new_logs[['request_rate_per_ip', 'success_to_failure_ratio', 'avg_request_duration_per_ip', 'url_entropy_per_ip', 'request_method_encoded']]
    X_new_imputed = imputer.transform(X_new)
    y_new_pred = detection_model.predict(X_new_imputed)

    attack_ips = new_logs.loc[y_new_pred == 1, 'source_ip'].unique()
    return attack_ips

In [None]:
def mitigate_attack(server_logs, attack_ips, firewall_config_file):

    with open(firewall_config_file, 'a') as f:
        for ip in attack_ips:
            f.write(f'iptables -A INPUT -s {ip} -j DROP')
            f.write(f'\n')

    for ip in attack_ips:
        print(f'Blocked IP address {ip} due to detected DDoS attack.')
        log_mitigation_action(server_logs, ip)

In [None]:
def log_mitigation_action(server_logs, ip_address):

    ip_logs = server_logs[server_logs['source_ip'] == ip_address]

    with open('mitigation_log.txt', 'a') as f:
        f.write(f'Blocked IP address {ip_address} at {time.strftime("%Y-%m-%d %H:%M:%S")}\n')
        f.write('Related log entries:\n')
        for _, row in ip_logs.iterrows():
            f.write(f'- Timestamp: {row["timestamp"]}, Request URL: {row["request_url"]}, Response Status: {row["response_status"]}\n')
        f.write('\n')

In [None]:
server_logs_file = 'new_server_logs.csv'
new_logs_file = 'dataset_archived_big.csv'

In [None]:
detection_model, imputer = train_detection_model(server_logs_file)



In [None]:
attack_ips = detect_ddos(new_logs_file, detection_model, imputer)



In [None]:
if len(attack_ips) > 0:
    firewall_config_file = 'firewall_config.txt'
    mitigate_attack(pd.read_csv(new_logs_file), attack_ips, firewall_config_file)

Blocked IP address 223.254.74.157 due to detected DDoS attack.
Blocked IP address 58.30.103.184 due to detected DDoS attack.
Blocked IP address 93.47.162.191 due to detected DDoS attack.
Blocked IP address 183.51.44.206 due to detected DDoS attack.
Blocked IP address 85.2.215.227 due to detected DDoS attack.
Blocked IP address 41.204.111.51 due to detected DDoS attack.
Blocked IP address 68.226.160.154 due to detected DDoS attack.
Blocked IP address 65.15.73.124 due to detected DDoS attack.
Blocked IP address 90.212.189.26 due to detected DDoS attack.
Blocked IP address 246.253.50.168 due to detected DDoS attack.
Blocked IP address 124.50.135.209 due to detected DDoS attack.
Blocked IP address 216.11.36.255 due to detected DDoS attack.
Blocked IP address 232.230.137.47 due to detected DDoS attack.
Blocked IP address 48.194.83.108 due to detected DDoS attack.
Blocked IP address 140.163.159.130 due to detected DDoS attack.
Blocked IP address 42.201.211.79 due to detected DDoS attack.
Blo