In [None]:
#Anomaly Detection using ML and Python

import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import get_file

path = get_file('kddcup.data_10_percent.gz', origin='http://kdd.ics.uci.edu/databases/kddcup99/kddcup.data_10_percent.gz')
data = pd.read_csv(path, header=None)

In [None]:
columns = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes",
    "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in",
    "num_compromised", "root_shell", "su_attempted", "num_root", "num_file_creations",
    "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login",
    "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate",
    "rerror_rate", "srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
    "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count",
    "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate",
    "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate",
    "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "attack_type"
]

data.columns = columns

In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
X = data.drop(["attack_type"], axis=1)
y = data["attack_type"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
random_forest_model = RandomForestClassifier(n_estimators=1000, random_state=42)
random_forest_model.fit(X_train_scaled, y_train)
rf_predictions = random_forest_model.predict(X_test_scaled)

rf_accuracy = accuracy_score(y_test, rf_predictions)
rf_precision = precision_score(y_test, rf_predictions, average='macro', zero_division=1)
rf_recall = recall_score(y_test, rf_predictions, average='macro')
rf_f1 = f1_score(y_test, rf_predictions, average='macro')

print("Random Forest:")
print("Accuracy:", rf_accuracy)
print("Precision:", rf_precision)
print("Recall:", rf_recall)
print("F1 Score:", rf_f1)

In [None]:
from sklearn.svm import SVC
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train_scaled, y_train)
svm_predictions = svm_model.predict(X_test_scaled)

svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_precision = precision_score(y_test, svm_predictions, average='macro', zero_division=1)
svm_recall = recall_score(y_test, svm_predictions, average='macro')
svm_f1 = f1_score(y_test, svm_predictions, average='macro')

print("\nSVM:")
print("Accuracy:", svm_accuracy)
print("Precision:", svm_precision)
print("Recall:", svm_recall)
print("F1 Score:", svm_f1)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

labels = ['Accuracy', 'Precision', 'Recall', 'F1']
random_tree = [rf_accuracy, rf_precision, rf_recall, rf_f1]
SVM = [svm_accuracy, svm_precision, svm_recall, svm_f1]

x = np.arange(len(labels)) 
width = 0.25  

fig, ax = plt.subplots(figsize=(5, 5))
rects1 = ax.bar(x - width, random_tree, width, label='Random Forest')
rects3 = ax.bar(x + width, SVM, width, label='SVM')

ax.set_xlabel('Metrics')
ax.set_ylabel('Scores')
ax.set_title('Scores by metric and algorithm')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

rf_conf_matrix = confusion_matrix(y_test, rf_predictions)
svm_conf_matrix = confusion_matrix(y_test, svm_predictions)

fig, ax = plt.subplots(1, 2, figsize=(10, 6))

sns.heatmap(rf_conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax[0])
ax[0].set_title('Random Forest Confusion Matrix')
ax[0].set_xlabel('Predicted')
ax[0].set_ylabel('Actual')

sns.heatmap(svm_conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax[1])
ax[1].set_title('SVM Confusion Matrix')
ax[1].set_xlabel('Predicted')
ax[1].set_ylabel('Actual')

plt.show()