In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('APA-DDoS-Dataset.csv')
data.info()

In [None]:
# Step 2: Data Cleaning and Preprocessing
# Drop any rows with missing values
data = data.dropna()

# Convert non-numeric columns to numeric using LabelEncoder
label_encoder = LabelEncoder()
# data['ip_src'] = label_encoder.fit_transform(data['ip.src'])
# data['ip_dst'] = label_encoder.fit_transform(data['ip.dst'])
data['label'] = label_encoder.fit_transform(data['label'])

# Drop the 'time' column
data = data.drop('frame_time', axis=1)
data = data.drop('ip_dst', axis=1)
data = data.drop('ip_src', axis=1)
# Drop any other columns that are not suitable for KNN classification (if needed)
data.info()

In [None]:
data.head()

In [None]:
# Step 4: K-Nearest Neighbors (KNN) Classification
# Separate features and target variable
X = data.drop('label', axis=1)
y = data['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier
k_value = 5  # You can set the desired value for k here
knn_classifier = KNeighborsClassifier(n_neighbors=k_value)
X_test.head()

In [None]:
# Train the classifier
knn_classifier.fit(X_train, y_train)

import joblib
filename = 'knn_classifier_model_2.pkl'
joblib.dump(knn_classifier, filename)
print(f"Model saved as {filename}")

In [None]:
X_test

In [None]:
# Make predictions on the test set
y_pred = knn_classifier.predict(X_test)

# Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report and confusion matrix
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))