In [13]:
pip install tensorflow tf-keras-vis matplotlib

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting tf-keras-vis
  Downloading tf_keras_vis-0.8.7-py3-none-any.whl.metadata (10 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Downloading tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading google_


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

from sklearn.feature_selection import VarianceThreshold
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

In [2]:
# initial preprocessing

df = pd.read_csv("IoT_dataset.csv", low_memory=False)
df.head(5)
print(df['Attack_type'].value_counts())
from sklearn.utils import shuffle
drop_columns = ["frame.time", "ip.src_host", "ip.dst_host", "arp.src.proto_ipv4","arp.dst.proto_ipv4", 
         "http.file_data","http.request.full_uri","icmp.transmit_timestamp",
         "http.request.uri.query", "tcp.options","tcp.payload","tcp.srcport",
         "tcp.dstport", "udp.port", "mqtt.msg"]
df.drop(drop_columns, axis=1, inplace=True)
df.dropna(axis=0, how='any', inplace=True)
df.drop_duplicates(subset=None, keep="first", inplace=True)
df = shuffle(df)
df.isna().sum()
print(df['Attack_type'].value_counts())

Attack_type
Normal                   24301
DDoS_UDP                 14498
DDoS_ICMP                14090
Ransomware               10925
DDoS_HTTP                10561
SQL_injection            10311
Uploading                10269
DDoS_TCP                 10247
Backdoor                 10195
Vulnerability_scanner    10076
Port_Scanning            10071
XSS                      10052
Password                  9989
MITM                      1214
Fingerprinting            1001
Name: count, dtype: int64
Attack_type
Normal                   24101
DDoS_UDP                 14498
DDoS_ICMP                13096
DDoS_HTTP                10495
SQL_injection            10282
DDoS_TCP                 10247
Uploading                10214
Vulnerability_scanner    10062
Password                  9972
Backdoor                  9865
Ransomware                9689
XSS                       9543
Port_Scanning             8921
Fingerprinting             853
MITM                       358
Name: count, dtype: 

In [3]:
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le
y = df['Attack_type']
X = df.drop(['Attack_type'], axis=1)

PCA reduces the CNN accuracy here.

In [8]:
# preprocessing for CNN
labels = df.pop('Attack_type') 

# Variance Threshold : Remove features with low variance
selector = VarianceThreshold(threshold=0.01)
df = selector.fit_transform(df)

# SMOTE for class balance
smote = SMOTE(random_state=42)
data_resampled, labels_resampled = smote.fit_resample(df, labels)
label_encoder = LabelEncoder()
labels_resampled = label_encoder.fit_transform(labels_resampled)
n_features = data_resampled.shape[1]
square_dim = int(np.ceil(np.sqrt(n_features)))

# Pad data to ensure square dimensions
if square_dim * square_dim > n_features:
    padding = np.zeros((data_resampled.shape[0], square_dim * square_dim - n_features))
    data_resampled = np.hstack((data_resampled, padding))
data_reshaped = data_resampled.reshape(data_resampled.shape[0], square_dim, square_dim, 1)

num_classes = len(np.unique(labels_resampled))
if num_classes > 2:
    labels_resampled = to_categorical(labels_resampled)

X_train, X_test, y_train, y_test = train_test_split(data_reshaped, labels_resampled, test_size=0.2, random_state=42, stratify=labels_resampled)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1)).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(X_test.shape[0], -1)).reshape(X_test.shape)

Using MaxPooling2D in the early layers and Dropout in the later layers of a Convolutional Neural Network (CNN):

### MaxPooling2D in Early Layers
- **Dimensionality Reduction**: MaxPooling2D helps in reducing the spatial dimensions (width and height) of the input volume, which decreases the number of parameters and computations in the network, making it more efficient.
- **Translation Invariance**: Provides a form of translation invariance, meaning the network becomes less sensitive to the exact position of features in the input image.
- **Feature Extraction**: By focusing on the most prominent features, MaxPooling2D helps in extracting the most important features from the input data, which can be useful for the subsequent layers.

### Dropout in Later Layers
- **Prevent Overfitting**: Dropout is a regularization technique that helps in preventing overfitting by randomly setting a fraction of input units to 0 at each update during training time. This forces the network to learn more robust features that are useful in conjunction with many different random subsets of the other neurons.
- **Improved Generalization**: By preventing overfitting, Dropout helps in improving the generalization of the model to new, unseen data.

In [9]:
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(square_dim, square_dim, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes if num_classes > 2 else 1, activation='softmax' if num_classes > 2 else 'sigmoid')
])

# model = Sequential([
#     Conv2D(32, kernel_size=(3, 3), activation='sigmoid', input_shape=(square_dim, square_dim, 1)),
#     Dropout(0.25),
#     Conv2D(64, kernel_size=(3, 3), activation='relu'),
#     Dropout(0.25),
#     Flatten(),
#     Dense(128, activation='relu'),
#     Dropout(0.5),
#     Dense(num_classes if num_classes > 2 else 1, activation='softmax' if num_classes > 2 else 'sigmoid')
# ])

loss_function = 'categorical_crossentropy' if num_classes > 2 else 'binary_crossentropy'
model.compile(optimizer='adam', loss=loss_function, metrics=['accuracy'])
print("Training the model...")
history = model.fit(X_train, y_train, batch_size=32, epochs=10, validation_split=0.2, verbose=1)

model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {model_accuracy * 100:.2f}%")

Training the model...
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7291/7291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 0.8800 - loss: 0.3767 - val_accuracy: 0.9913 - val_loss: 0.0230
Epoch 2/10
[1m7291/7291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - accuracy: 0.9869 - loss: 0.0365 - val_accuracy: 0.9928 - val_loss: 0.0178
Epoch 3/10
[1m7291/7291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.9911 - loss: 0.0267 - val_accuracy: 0.9951 - val_loss: 0.0120
Epoch 4/10
[1m7291/7291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.9924 - loss: 0.0205 - val_accuracy: 0.9944 - val_loss: 0.0129
Epoch 5/10
[1m7291/7291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 3ms/step - accuracy: 0.9931 - loss: 0.0187 - val_accuracy: 0.9951 - val_loss: 0.0103
Epoch 6/10
[1m7291/7291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 3ms/step - accuracy: 0.9937 - loss: 0.0162 - val_accuracy: 0.9957 - val_loss: 0.0101
Epoch 7/10
[1m7291/7

In [11]:
from tf_keras_vis.gradcam import Gradcam
from tensorflow.keras.models import Model
from tf_keras_vis.utils.model_modifiers import ReplaceToLinear
from tf_keras_vis.utils.scores import CategoricalScore
from tf_keras_vis.activation_maximization import ActivationMaximization

model = Model(inputs=model.input, outputs=model.output, name="wrapped_model")
gradcam = Gradcam(model,
                  model_modifier=ReplaceToLinear(),
                  clone=True)

# Define the input data you want to interpret
# This should be a sample that has been preprocessed similarly to your training data
test_input = np.expand_dims(X[0], axis=0)  # Expand dims if needed for batch dimension

# Apply DeepLIFT using tf-keras-vis Gradcam for visualization
score = CategoricalScore([1])  # Target the positive class (adjust index if needed)
cam = gradcam(score, test_input, penultimate_layer=-1)  # Penultimate layer to visualize relevance

# Plot the DeepLIFT relevance heatmap
heatmap = np.uint8(255 * cam[0])
plt.imshow(heatmap, cmap='viridis')
plt.colorbar()
plt.show()