In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# Load datasets
file_paths = [
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv",
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv",
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Friday-WorkingHours-Morning.pcap_ISCX.csv",
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Monday-WorkingHours.pcap_ISCX.csv",
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv",
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv",
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Tuesday-WorkingHours.pcap_ISCX.csv",
    r"E:/Random Python Scripts/CICIDS/CICIDS2017/MachineLearningCVE/Wednesday-workingHours.pcap_ISCX.csv"
]

# Concatenate all datasets
df = pd.concat([pd.read_csv(file) for file in file_paths], ignore_index=True)

In [3]:
# Standardize the labels
df[' Label'].replace("Web.*", "Web Attack", regex=True, inplace=True)
df[' Label'].replace(r'.*Patator$', "Brute Force", regex=True, inplace=True)
df[' Label'].replace(["DoS GoldenEye", "DoS Hulk", "DoS Slowhttptest", "DoS slowloris"], "DDoS/DoS", inplace=True)
df[' Label'].replace("DDoS", "DDoS/DoS", inplace=True)
df[' Label'].replace("Heartbleed", "DDoS/DoS", inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[' Label'].replace("Web.*", "Web Attack", regex=True, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[' Label'].replace(r'.*Patator$', "Brute Force", regex=True, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because t

In [4]:
# Remove whitespace from column names
df.columns = [col.strip() for col in df.columns]

In [5]:
# Shuffle the dataset
df = df.sample(frac=1, random_state=8).reset_index(drop=True)

In [6]:
# Encode labels
y = LabelEncoder().fit_transform(df['Label'])

In [7]:
# Drop the label column and preprocess the features
x = df.drop(columns=['Label'], axis=1).astype('float32')

In [None]:
# Save preprocessed features and labels as a CSV file
preprocessed_df = pd.concat([x, pd.Series(y, name='Label')], axis=1)
preprocessed_df.to_csv("preprocessed_data.csv", index=False)
print("Preprocessed data saved as 'preprocessed_data.csv'.")

In [9]:
# Handle missing and infinite values
x.replace([np.inf, -np.inf], np.nan, inplace=True)
x.fillna(x.mean(), inplace=True)
x[x < 0] = np.nan
x.fillna(x.min(), inplace=True)

In [10]:
# Scale the features
scaler = StandardScaler()
x = pd.DataFrame(scaler.fit_transform(x), index=x.index, columns=x.columns)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=8, stratify=y)


In [11]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
# Define the architecture
def build_model(input_dim, num_classes):
    # Creating layers
    inputs = Input(shape=(input_dim,))

    x = Dense(units=input_dim, activation='relu')(inputs)  # Level one
    x = BatchNormalization()(x)
    x = Dropout(0.1)(x)

    x = Dense(units=15, activation='relu')(x)  # Level two
    x = BatchNormalization()(x)

    x = Dense(units=7, activation='relu')(x)  # Bottleneck
    x = BatchNormalization()(x)

    x = Dense(units=15, activation='relu')(x)  # Level two
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    x = Dense(units=35, activation='relu')(x)  # Level one
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

    model.compile(
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'],
        optimizer=Adam(learning_rate=0.001)
    )
    return model

In [13]:
# Early stopping
early_stop = EarlyStopping(monitor='accuracy', patience=5)

In [14]:
# Initialize the model
input_dim = X_train.shape[1]
num_classes = len(np.unique(y_train))
model = build_model(input_dim, num_classes)

In [15]:
# Display model summary
model.summary()

In [16]:
# Train the model with EarlyStopping
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=128,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/100
[1m12385/12385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 4ms/step - accuracy: 0.9500 - loss: 0.1499 - val_accuracy: 0.9733 - val_loss: 0.0580
Epoch 2/100
[1m12385/12385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 4ms/step - accuracy: 0.9754 - loss: 0.0588 - val_accuracy: 0.9756 - val_loss: 0.0449
Epoch 3/100
[1m12385/12385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 4ms/step - accuracy: 0.9835 - loss: 0.0428 - val_accuracy: 0.9846 - val_loss: 0.0404
Epoch 4/100
[1m12385/12385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 4ms/step - accuracy: 0.9870 - loss: 0.0348 - val_accuracy: 0.9874 - val_loss: 0.0312
Epoch 5/100
[1m12385/12385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 4ms/step - accuracy: 0.9866 - loss: 0.0352 - val_accuracy: 0.9882 - val_loss: 0.0279
Epoch 6/100
[1m12385/12385[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 4ms/step - accuracy: 0.9880 - loss: 0.0307 - val_accuracy: 0.9876 - val_loss:

In [17]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Classification report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred_classes))

Test Loss: 0.01958256959915161
Test Accuracy: 0.9930630922317505
[1m26539/26539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.99      1.00      1.00    681929
           1       0.90      0.35      0.50       590
           2       0.98      0.95      0.97      4150
           3       1.00      0.97      0.98    114210
           4       0.00      0.00      0.00        11
           5       0.99      1.00      1.00     47679
           6       0.00      0.00      0.00       654

    accuracy                           0.99    849223
   macro avg       0.69      0.61      0.63    849223
weighted avg       0.99      0.99      0.99    849223



In [19]:
# Save the model
model.save("DNN_MC.h5")
print("Model saved successfully.")



Model saved successfully.
