In [None]:
import numpy as np
import pandas as pd
from minisom import MiniSom
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from joblib import Parallel, delayed

# Load the dataset
data = pd.read_csv("labeled_ddostrace.to-victim.20070804.csv")

# Convert other columns to appropriate data types
data['Source IP'] = data['Source IP'].astype(str)
data['Destination IP'] = data['Destination IP'].astype(str)
data['Protocol'] = pd.to_numeric(data['Protocol'], errors='coerce') # if it's numeric
data['Frame Length'] = pd.to_numeric(data['Frame Length'], errors='coerce') # if it's numeric
data['Port Used'] = pd.to_numeric(data['Port Used'], errors='coerce') # if it's numeric
data['Interpacket Time'] = pd.to_numeric(data['Interpacket Time'], errors='coerce') # if it's numeric
data['Entropy'] = pd.to_numeric(data['Entropy'], errors='coerce') # if it's numeric
data['label'] = pd.to_numeric(data['label'], errors='coerce') # if it's numeric

# Drop rows with missing values
data.dropna(inplace=True)

# Separate features and labels
X = data.drop(['label', 'Timestamp', 'Source IP', 'Destination IP'], axis=1).values
y = data['label'].values

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Define SOM parameters
som_grid_rows = 10
som_grid_columns = 10
input_len = X_scaled.shape[1]
num_iterations = 10000

# Train SOM incrementally
som = MiniSom(som_grid_rows, som_grid_columns, input_len, sigma=1.0, learning_rate=0.5)
som.random_weights_init(X_scaled)

def train_som_iteration(X):
    som.train_random(X, 1)

Parallel(n_jobs=-1)(delayed(train_som_iteration)(X_scaled) for _ in range(num_iterations))

# Find the winning neurons for each sample
winning_neurons = np.array([som.winner(x) for x in X_scaled])

# Assign labels to clusters based on the majority label of the samples in each cluster
cluster_labels = []
total_samples = len(winning_neurons)

def assign_cluster_label(neuron):
    cluster_samples_indices = np.where((winning_neurons[:, 0] == neuron[0]) & (winning_neurons[:, 1] == neuron[1]))[0]
    return y[cluster_samples_indices].mean()

cluster_labels = Parallel(n_jobs=-1)(delayed(assign_cluster_label)(neuron) for neuron in winning_neurons)

# Print progress
print("Assigning cluster labels: 100.00% complete")

# Convert labels to binary (assuming 0 for non-DDoS and 1 for DDoS)
y_binary = np.where(y > 0, 1, 0)

# Convert cluster labels to binary
cluster_labels_binary = np.where(np.array(cluster_labels) > 0, 1, 0)

# Evaluate accuracy
accuracy = accuracy_score(y_binary, cluster_labels_binary)
print("Accuracy:", accuracy)


In [2]:
import pandas as pd
import numpy as np

# Load CSV data into a DataFrame
df = pd.read_csv('labeled_ddostrace.to-victim.20070804.csv')

# Calculate probabilities of each unique value
unique_values = df['Source IP'].unique()
probabilities = df['Source IP'].value_counts(normalize=True)

# Calculate entropy
entropy = -np.sum(probabilities * np.log2(probabilities))

print("Entropy of Source IP addresses:", entropy)


  df = pd.read_csv('labeled_ddostrace.to-victim.20070804.csv')


Entropy of Source IP addresses: 8.633429095354296


In [3]:
total_packets = len(df)
print("Total number of packets:", total_packets)


Total number of packets: 1134552


In [5]:
# Function to calculate entropy
def calculate_entropy(probabilities):
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

# Calculate entropy for each feature
source_ip_probabilities = df['Source IP'].value_counts(normalize=True)
source_port_probabilities = df['Port Used'].value_counts(normalize=True)
dest_port_probabilities = df['Port Used'].value_counts(normalize=True)
protocol_probabilities = df['Protocol'].value_counts(normalize=True)

source_ip_entropy = calculate_entropy(source_ip_probabilities)
source_port_entropy = calculate_entropy(source_port_probabilities)
dest_port_entropy = calculate_entropy(dest_port_probabilities)
protocol_entropy = calculate_entropy(protocol_probabilities)

total_packets = len(df)

# Add the calculated entropy values as new columns
df['Entropy of Source IP'] = source_ip_entropy
df['Entropy of Source Port'] = source_port_entropy
df['Entropy of Destination Port'] = dest_port_entropy
df['Entropy of Protocol'] = protocol_entropy
df['Total Packets'] = total_packets

# Save the modified DataFrame with new features
df.to_csv('modified_data.csv', index=False)