In [None]:
from google.colab import drive
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Function to read data from CSV files
def read_data_from_csv(folder_path):
    dfs = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

# Path to train and test folders
train_folder = '/content/drive/My Drive/datasets/train'
test_folder = '/content/drive/My Drive/datasets/test'


In [None]:
# Read data from train and test folders
train_data = read_data_from_csv(train_folder)
test_data = read_data_from_csv(test_folder)


In [None]:
# Split data into features and target variable
X_train = train_data.drop(columns=['label'])
y_train = train_data['label']
X_test = test_data.drop(columns=['label'])
y_test = test_data['label']


In [None]:
# Encode labels if they are not numerical
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


In [None]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Initialize Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:
# Train Random Forest model
rf_model.fit(X_train, y_train)

In [None]:
# Predict labels using RF model
y_pred_rf = rf_model.predict(X_test)

In [None]:
# Convert predictions to integer arrays
y_pred_rf = label_encoder.transform(y_pred_rf)


In [None]:
# Use the predictions of the Random Forest model as additional features
X_train_with_rf_pred = np.column_stack((X_train_scaled[:len(y_pred_rf)], y_pred_rf))
X_test_with_rf_pred = np.column_stack((X_test_scaled[:len(y_pred_rf)], y_pred_rf))


In [None]:
# # Define the DNN model
# dnn_model = Sequential()
# dnn_model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
# dnn_model.add(Dense(64, activation='relu'))
# dnn_model.add(Dense(len(label_encoder.classes_), activation='softmax')) # Output layer

dnn_model_with_rf = Sequential()
dnn_model_with_rf.add(Dense(64, activation='relu', input_shape=(X_train_with_rf_pred.shape[1],)))
dnn_model_with_rf.add(Dense(64, activation='relu'))
dnn_model_with_rf.add(Dense(len(label_encoder.classes_), activation='softmax')) # Output layer



In [None]:
# # Compile the DNN model
# dnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Compile the DNN model
dnn_model_with_rf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# # Train the DNN model
# dnn_model.fit(X_train_scaled, y_train_encoded, epochs=20, batch_size=32, validation_split=0.2, verbose=1)


# Train the DNN model with Random Forest predictions as features
dnn_model_with_rf.fit(X_train_with_rf_pred, y_train_encoded, epochs=20, batch_size=32, validation_split=0.2, verbose=1)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7a37db078e50>

In [None]:
# Evaluate the performance of the DNN model with Random Forest predictions as features
y_pred_dnn_with_rf = np.argmax(dnn_model_with_rf.predict(X_test_with_rf_pred), axis=1)




In [None]:
# Calculate accuracy
accuracy_with_rf = accuracy_score(y_test_encoded, y_pred_dnn_with_rf)
print("Accuracy with Random Forest predictions as features:", accuracy_with_rf)


Accuracy with Random Forest predictions as features: 0.983119990060427


In [None]:
# Print classification report
print("Classification Report with Random Forest predictions as features:")
print(classification_report(y_test_encoded, y_pred_dnn_with_rf))


Classification Report with Random Forest predictions as features:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       267
           1       0.70      0.98      0.81     83268
           2       0.00      0.00      0.00       426
           3       0.00      0.00      0.00       378
           4       1.00      0.98      0.99     21759
           5       0.93      0.91      0.92      2163
           6       1.00      1.00      1.00    544992
           7       0.99      0.98      0.99     34273
           8       1.00      1.00      1.00    310418
           9       1.00      1.00      1.00    305877
          10       1.00      1.00      1.00    307639
          11       0.55      0.72      0.63      1789
          12       1.00      1.00      1.00    271629
          13       1.00      1.00      1.00    340918
          14       1.00      1.00      1.00    409362
          15       1.00      0.98      0.99     21861
          16       0.36      0.14      0.20     13586
          17       1.00    

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Compute confusion matrix
cm_with_rf = confusion_matrix(y_test_encoded, y_pred_dnn_with_rf)
print("Confusion Matrix with Random Forest predictions as features:")
print(cm_with_rf)

In [None]:
# Predict probabilities for each class using DNN model
y_pred_prob_dnn = dnn_model.predict(X_test_scaled)



In [None]:
# Predict class labels using DNN model
y_pred_dnn = np.argmax(y_pred_prob_dnn, axis=1)


In [None]:
# Combine predictions (simple voting)
y_pred_hybrid = (y_pred_rf + y_pred_dnn) / 2


In [None]:
# Round the combined predictions to the nearest integer
y_pred_hybrid = np.round(y_pred_hybrid).astype(int)


In [None]:
# Evaluate the combined predictions
combined_accuracy = accuracy_score(y_test, y_pred_hybrid)
print("Combined Model Accuracy:", combined_accuracy)


Combined Model Accuracy: 0.0


In [None]:
# Convert predictions from the hybrid model to match the data type of y_test
y_pred_hybrid_encoded = label_encoder.inverse_transform(np.round(y_pred_hybrid).astype(int))

# Print classification report for the combined model
print("Classification Report for Combined Model:")
print(classification_report(y_test, y_pred_hybrid_encoded))


Classification Report for Combined Model:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                         precision    recall  f1-score   support

       Backdoor_Malware       0.50      0.01      0.02       267
          BenignTraffic       0.92      0.89      0.91     83268
       BrowserHijacking       0.53      0.10      0.17       426
       CommandInjection       0.00      0.00      0.00       378
 DDoS-ACK_Fragmentation       1.00      0.97      0.99     21759
        DDoS-HTTP_Flood       0.99      0.65      0.79      2163
        DDoS-ICMP_Flood       1.00      1.00      1.00    544992
DDoS-ICMP_Fragmentation       1.00      0.97      0.99     34273
      DDoS-PSHACK_Flood       0.98      1.00      0.99    310418
       DDoS-RSTFINFlood       1.00      1.00      1.00    305877
         DDoS-SYN_Flood       1.00      0.99      0.99    307639
         DDoS-SlowLoris       0.01      0.01      0.01      1789
DDoS-SynonymousIP_Flood       0.96      1.00      0.98    271629
         DDoS-TCP_Flood       1.00      1.00      1.00    340918
         DDoS-UDP_Flood 

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
cm_combined = confusion_matrix(y_test, y_pred_hybrid_encoded)
print("Confusion Matrix for Combined Model:")
print(cm_combined)

Confusion Matrix for Combined Model:
[[    3    15     3 ...     0     0     0]
 [    0 74436     1 ...     0     0     0]
 [    0    41    44 ...     0     0     0]
 ...
 [    0    15     5 ...     0     0     0]
 [    0     1     0 ...     0  2147     0]
 [    1    24     8 ...     0     0     0]]
