<a href="https://colab.research.google.com/github/ushasri999/Multiclass-classification-of-DDoS-attacks-in-IoT-network-using-hybrid-feature-selection-algorithm/blob/main/DT_12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**12 Classes, 15 features**

In [None]:
#5
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, f1_score

# Step 1: Load the filtered dataset
df = pd.read_csv('/content/drive/MyDrive/major project/data.csv')  # Load your actual dataset here
df_ddos = df[df['label'].str.contains('DDoS', case=False, na=False)]  # Filter DDoS labels

# Step 2: Separate features and target
X = df_ddos.drop("label", axis=1, errors='ignore')  # Avoid crashing if 'label' does not exist
y = df_ddos["label"] if 'label' in df_ddos.columns else pd.Series([])  # Handle case where label is missing

# Check if X and y are valid
print(X.shape)  # Should not be empty
print(y.shape)  # Should not be empty

# Step 3: Handle categorical and missing data
X = pd.get_dummies(X, drop_first=True)  # One-hot encode categorical features
X.fillna(X.mean(), inplace=True)  # Fill missing values with the mean


(173777, 46)
(173777,)


In [None]:
# Step 4: Hybrid Feature Selection
filter_selector = SelectKBest(score_func=f_classif, k=8)  # Select only top 5 features using filter method
X_new_filter = filter_selector.fit_transform(X, y)
filter_scores = filter_selector.scores_

# Wrapper Method: Recursive Feature Elimination (RFE) with Decision Tree
rfe_selector = RFE(estimator=DecisionTreeClassifier(random_state=42), n_features_to_select=7)  # Select 5 features
X_new_wrapper = rfe_selector.fit_transform(X, y)
rfe_ranking = rfe_selector.ranking_

# Combine results: Select top 5 features from both methods
top_features_filter = np.argsort(filter_scores)[-8:]  # Top 5 features from filter method
print('top_features_filter ', top_features_filter)
top_features_rfe = np.where(rfe_ranking == 1)[0]  # Features selected by RFE
print('top_features_rfe ', top_features_rfe)
top_features_combined = list(set(top_features_filter) | set(top_features_rfe))  # Combine both selections
print('top_features_combined ', top_features_combined)

# Step 5: Select top features from the dataset
X_selected = X.iloc[:, top_features_combined]

# Print the names of the selected features
print("Selected Features:", X_selected.columns.tolist())

  f = msb / msw


top_features_filter  [ 7 28 25 24 23 13 12 22]
top_features_rfe  [ 1  2  7  8 17 34 39]
top_features_combined  [1, 2, 34, 7, 8, 39, 12, 13, 17, 22, 23, 24, 25, 28]
Selected Features: ['Header_Length', 'Protocol Type', 'Min', 'fin_flag_number', 'syn_flag_number', 'IAT', 'ece_flag_number', 'cwr_flag_number', 'urg_count', 'Telnet', 'SMTP', 'SSH', 'IRC', 'DHCP']


In [None]:
from sklearn.metrics import recall_score

# Step 6: Train and Test Split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# Step 7: Train Decision Tree Classifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Step 8: Evaluate the model
y_pred = dt_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes
recall = recall_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes
f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes

# Print metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("recall:", recall)
print("F1 Score:", f1)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.9995780105113745
Precision: 0.9995783153181416
recall: 0.9995780105113745
F1 Score: 0.9995778282533108
Classification Report:
                          precision    recall  f1-score   support

 DDoS-ACK_Fragmentation       0.99      0.99      0.99       441
        DDoS-HTTP_Flood       0.96      0.96      0.96        48
        DDoS-ICMP_Flood       1.00      1.00      1.00     10925
DDoS-ICMP_Fragmentation       1.00      1.00      1.00       726
      DDoS-PSHACK_Flood       1.00      1.00      1.00      6409
       DDoS-RSTFINFlood       1.00      1.00      1.00      6131
         DDoS-SYN_Flood       1.00      1.00      1.00      6173
         DDoS-SlowLoris       1.00      0.95      0.98        21
DDoS-SynonymousIP_Flood       1.00      1.00      1.00      5436
         DDoS-TCP_Flood       1.00      1.00      1.00      6982
         DDoS-UDP_Flood       1.00      1.00      1.00      8369
 DDoS-UDP_Fragmentation       1.00      1.00      1.00       473

              

**12 Classes, 10 features**

In [None]:
# Step 4: Hybrid Feature Selection
filter_selector = SelectKBest(score_func=f_classif, k=5)  # Select only top 5 features using filter method
X_new_filter = filter_selector.fit_transform(X, y)
filter_scores = filter_selector.scores_

# Wrapper Method: Recursive Feature Elimination (RFE) with Decision Tree
rfe_selector = RFE(estimator=DecisionTreeClassifier(random_state=42), n_features_to_select=5)  # Select 5 features
X_new_wrapper = rfe_selector.fit_transform(X, y)
rfe_ranking = rfe_selector.ranking_

# Combine results: Select top 5 features from both methods
top_features_filter = np.argsort(filter_scores)[-5:]  # Top 5 features from filter method
print('top_features_filter ', top_features_filter)
top_features_rfe = np.where(rfe_ranking == 1)[0]  # Features selected by RFE
print('top_features_rfe ', top_features_rfe)
top_features_combined = list(set(top_features_filter) | set(top_features_rfe))  # Combine both selections
print('top_features_combined ', top_features_combined)

# Step 5: Select top features from the dataset
X_selected = X.iloc[:, top_features_combined]

# Print the names of the selected features
print("Selected Features:", X_selected.columns.tolist())

  f = msb / msw


top_features_filter  [24 23 13 12 22]
top_features_rfe  [ 7  8 17 34 39]
top_features_combined  [34, 39, 8, 7, 12, 13, 17, 22, 23, 24]
Selected Features: ['Min', 'IAT', 'syn_flag_number', 'fin_flag_number', 'ece_flag_number', 'cwr_flag_number', 'urg_count', 'Telnet', 'SMTP', 'SSH']


In [None]:
from sklearn.metrics import recall_score

# Step 6: Train and Test Split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# Step 7: Train Decision Tree Classifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Step 8: Evaluate the model
y_pred = dt_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes
recall = recall_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes
f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes

# Print metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("recall:", recall)
print("F1 Score:", f1)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.9994821038094142
Precision: 0.9994822778394454
recall: 0.9994821038094142
F1 Score: 0.9994814224177551
Classification Report:
                          precision    recall  f1-score   support

 DDoS-ACK_Fragmentation       0.99      1.00      0.99       441
        DDoS-HTTP_Flood       1.00      0.98      0.99        48
        DDoS-ICMP_Flood       1.00      1.00      1.00     10925
DDoS-ICMP_Fragmentation       0.99      1.00      1.00       726
      DDoS-PSHACK_Flood       1.00      1.00      1.00      6409
       DDoS-RSTFINFlood       1.00      1.00      1.00      6131
         DDoS-SYN_Flood       1.00      1.00      1.00      6173
         DDoS-SlowLoris       1.00      0.95      0.98        21
DDoS-SynonymousIP_Flood       1.00      1.00      1.00      5436
         DDoS-TCP_Flood       1.00      1.00      1.00      6982
         DDoS-UDP_Flood       1.00      1.00      1.00      8369
 DDoS-UDP_Fragmentation       0.99      0.98      0.99       473

              

**12 Classes, 5 features**

In [None]:
# Step 4: Hybrid Feature Selection
filter_selector = SelectKBest(score_func=f_classif, k=3)  # Select only top 5 features using filter method
X_new_filter = filter_selector.fit_transform(X, y)
filter_scores = filter_selector.scores_

# Wrapper Method: Recursive Feature Elimination (RFE) with Decision Tree
rfe_selector = RFE(estimator=DecisionTreeClassifier(random_state=42), n_features_to_select=2)  # Select 5 features
X_new_wrapper = rfe_selector.fit_transform(X, y)
rfe_ranking = rfe_selector.ranking_

# Combine results: Select top 5 features from both methods
top_features_filter = np.argsort(filter_scores)[-3:]  # Top 5 features from filter method
print('top_features_filter ', top_features_filter)
top_features_rfe = np.where(rfe_ranking == 1)[0]  # Features selected by RFE
print('top_features_rfe ', top_features_rfe)
top_features_combined = list(set(top_features_filter) | set(top_features_rfe))  # Combine both selections
print('top_features_combined ', top_features_combined)

# Step 5: Select top features from the dataset
X_selected = X.iloc[:, top_features_combined]

# Print the names of the selected features
print("Selected Features:", X_selected.columns.tolist())

  f = msb / msw


top_features_filter  [13 12 22]
top_features_rfe  [34 39]
top_features_combined  [34, 22, 39, 12, 13]
Selected Features: ['Min', 'Telnet', 'IAT', 'ece_flag_number', 'cwr_flag_number']


In [None]:
from sklearn.metrics import recall_score

# Step 6: Train and Test Split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# Step 7: Train Decision Tree Classifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Step 8: Evaluate the model
y_pred = dt_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes
recall = recall_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes
f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted for imbalanced classes

# Print metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("recall:", recall)
print("F1 Score:", f1)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.9994629224690221
Precision: 0.9994634930402503
recall: 0.9994629224690221
F1 Score: 0.9994622755188299
Classification Report:
                          precision    recall  f1-score   support

 DDoS-ACK_Fragmentation       0.99      1.00      0.99       441
        DDoS-HTTP_Flood       1.00      1.00      1.00        48
        DDoS-ICMP_Flood       1.00      1.00      1.00     10925
DDoS-ICMP_Fragmentation       0.99      1.00      0.99       726
      DDoS-PSHACK_Flood       1.00      1.00      1.00      6409
       DDoS-RSTFINFlood       1.00      1.00      1.00      6131
         DDoS-SYN_Flood       1.00      1.00      1.00      6173
         DDoS-SlowLoris       1.00      0.95      0.98        21
DDoS-SynonymousIP_Flood       1.00      1.00      1.00      5436
         DDoS-TCP_Flood       1.00      1.00      1.00      6982
         DDoS-UDP_Flood       1.00      1.00      1.00      8369
 DDoS-UDP_Fragmentation       1.00      0.98      0.99       473

              