To use the bagging method to evaluate the dataset using both Random Forest (RF) and Deep Neural Network (DNN), you can follow these steps:

- Read the data from CSV files in both train and test folders.
- Preprocess the data, including encoding categorical variables and scaling numerical variables.
- Split the data into training and testing sets.
- Implement bagging using Random Forest and DNN.
- Evaluate the performance of each model.

In [None]:
from google.colab import drive
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

!pip install scikeras[tensorflow]

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Function to read data from CSV files
def read_data_from_csv(folder_path):
    dfs = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# Function to create DNN model
def create_dnn_model():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# Read data from CSV files
train_data = read_data_from_csv('/content/drive/My Drive/datasets/train')
test_data = read_data_from_csv('/content/drive/My Drive/datasets/test')



In [None]:
# Preprocess the data
# Assuming label is the target variable
X_train = train_data.drop(columns=['label'])
y_train = train_data['label']
X_test = test_data.drop(columns=['label'])
y_test = test_data['label']

# Concatenate training and testing data to ensure consistent label encoding
combined_data = pd.concat([X_train, X_test])

# Encode categorical variables
encoder = LabelEncoder()
combined_data['Protocol Type'] = encoder.fit_transform(combined_data['Protocol Type'])




In [None]:
# Split back into training and testing data
X_train['Protocol Type'] = combined_data['Protocol Type'][:len(X_train)]
X_test['Protocol Type'] = combined_data['Protocol Type'][len(X_train):]

In [None]:
# Scaling numerical variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:

# Bagging with Random Forest
rf_base_estimator = DecisionTreeClassifier()
rf_bagging = BaggingClassifier(base_estimator=rf_base_estimator, n_estimators=10, random_state=0)
rf_bagging.fit(X_train_scaled, y_train)
rf_predictions = rf_bagging.predict(X_test_scaled)





In [None]:
# Evaluate Random Forest
print("Random Forest Results:")
print(classification_report(y_test, rf_predictions))
print("Accuracy:", accuracy_score(y_test, rf_predictions))


Random Forest Results:
                         precision    recall  f1-score   support

       Backdoor_Malware       0.79      0.57      0.66       267
          BenignTraffic       0.91      0.98      0.94     83268
       BrowserHijacking       0.84      0.58      0.68       426
       CommandInjection       0.84      0.60      0.70       378
 DDoS-ACK_Fragmentation       1.00      1.00      1.00     21759
        DDoS-HTTP_Flood       0.99      1.00      1.00      2163
        DDoS-ICMP_Flood       1.00      1.00      1.00    544992
DDoS-ICMP_Fragmentation       1.00      1.00      1.00     34273
      DDoS-PSHACK_Flood       1.00      1.00      1.00    310418
       DDoS-RSTFINFlood       1.00      1.00      1.00    305877
         DDoS-SYN_Flood       1.00      1.00      1.00    307639
         DDoS-SlowLoris       0.99      0.99      0.99      1789
DDoS-SynonymousIP_Flood       1.00      1.00      1.00    271629
         DDoS-TCP_Flood       1.00      1.00      1.00    340918
 

In [None]:
from scikeras.wrappers import KerasClassifier

In [12]:
# Define the base DNN classifier
base_dnn_classifier = KerasClassifier(build_fn=create_dnn_model, epochs=10, batch_size=32, verbose=0)

# Initialize BaggingClassifier with the base DNN classifier
dnn_bagging = BaggingClassifier(base_estimator=base_dnn_classifier, n_estimators=10, random_state=0)

# Fit the BaggingClassifier
dnn_bagging.fit(X_train_scaled, y_train)


  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)
  X, y = self._initialize(X, y)


In [None]:
# Make predictions
dnn_predictions = dnn_bagging.predict(X_test_scaled)

In [None]:

# Evaluate DNN
print("\nDNN Results:")
print(classification_report(y_test, dnn_predictions))


In [None]:

print("Accuracy:", accuracy_score(y_test, dnn_predictions))