In [1]:
import pandas as pd
df_cleaned_3 = pd.read_csv('/kaggle/input/cicids2017/df_cleaned_3.csv')
df_cleaned_4 = pd.read_csv('/kaggle/input/cicids2017/df_cleaned_4.csv')

In [2]:
# Separate features (X) and labels (y)
X = df_cleaned_4.values  # Features
y = df_cleaned_3['Encoded_Label'].values  # Encoded labels

# # Normalize the features
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# X = scaler.fit_transform(X)
# Split the data into training and testing sets

from sklearn.model_selection import train_test_split

# Initial split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Further split the training set into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)


In [3]:
import numpy as np

# Check for extremely large values
print("Max value in X_train:", np.max(X_train))
print("Min value in X_train:", np.min(X_train))


Max value in X_train: 2071000000.0
Min value in X_train: -32212234632.0


In [4]:
# Apply SMOTE to the training set
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

scaler = StandardScaler()
smote = SMOTE(random_state=42)





In [5]:
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam


fine_tuned_model = load_model('/kaggle/input/model-saved/checkpoint_model_gru-6.keras')
fine_tuned_model.summary()

In [6]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Path to save the model
checkpoint_filepath = '/kaggle/working/checkpoint_model_gru.keras'
#checkpoint_filepath_weight = '/kaggle/working/checkpoint_model_gru.weights.h5'
# checkpoint_filepath_f1 = '/kaggle/working/checkpoint_model_f1_gru.keras'
# checkpoint_filepath_roc_auc = '/kaggle/working/checkpoint_model_roc_auc_gru.keras'

# Configure ModelCheckpoint to save the full model
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,  # Set to False to save the full model
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,  # Only save when 'val_accuracy' improves
    verbose=1
)

# model_checkpoint_weight = ModelCheckpoint(
#     filepath=checkpoint_filepath_weight,
#     save_weights_only=True,  # Set to False to save the full model
#     monitor='val_accuracy',
#     mode='max',
#     save_best_only=True,  # Only save when 'val_accuracy' improves
#     verbose=1
# )

# Checkpoint to save the best model based on the highest validation F1 score
# checkpoint_f1 = ModelCheckpoint(
#     filepath= checkpoint_filepath_f1,
#     save_weights_only=False,  # Set to False to save the full model
#     monitor='val_f1_score',
#     mode='max',
#     save_best_only=True,
#     verbose=1
# )

# # Checkpoint to save the best model based on the highest validation ROC-AUC score
# checkpoint_roc_auc = ModelCheckpoint(
#     filepath= checkpoint_filepath_roc_auc,
#     save_weights_only=False,  # Set to False to save the full model
#     monitor='val_roc_auc',
#     mode='max',
#     save_best_only=True,
#     verbose=1
# )

In [7]:


X_train_scaled = scaler.fit_transform(X_train)

# Get the original class distribution
class_counts = Counter(y_train)
print("Original class distribution:", class_counts)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size = class_counts[min(class_counts, key=class_counts.get)]
desired_majority_size = minority_class_size * 5

# Create the sampling strategy dictionary
sampling_strategy = {0: desired_majority_size, 1: minority_class_size}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler = RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=42)
X_resampled, y_resampled = undersampler.fit_resample(X_train, y_train)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled))


Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})


In [8]:
# Apply SMOTE on the smaller subset

X_train_resampled, y_train_resampled = smote.fit_resample(X_resampled, y_resampled)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled))
print("Class Distribution After SMOTE:", Counter(y_train_resampled))

Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [9]:
X_test = X_test.reshape(X_test.shape[0], 1, 56)  # Ensure shape matches training data

# Evaluate the model
test_loss, test_accuracy = fine_tuned_model.evaluate(X_test, y_test, verbose=2)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

23633/23633 - 36s - 2ms/step - accuracy: 0.6542 - loss: 1.8179
Test Loss: 1.8179376125335693
Test Accuracy: 0.6541851758956909


In [11]:
import numpy as np
from sklearn.metrics import f1_score, classification_report

# Get model predictions (probability distributions)
y_pred_probs = fine_tuned_model.predict(X_test)

# Convert predicted probabilities to class labels
y_pred_labels = np.argmax(y_pred_probs, axis=1)

# Convert one-hot encoded y_test to class labels (if necessary)
if len(y_test.shape) > 1 and y_test.shape[1] > 1:
    y_test_labels = np.argmax(y_test, axis=1)  # Convert from one-hot to categorical labels
else:
    y_test_labels = y_test  # Already categorical

# Compute F1 Score
f1_weighted = f1_score(y_test_labels, y_pred_labels, average="weighted")
f1_macro = f1_score(y_test_labels, y_pred_labels, average="macro")

# Print F1 Scores
print("Weighted F1 Score:", f1_weighted)
print("Macro F1 Score:", f1_macro)

# Print full classification report
print(classification_report(y_test_labels, y_pred_labels))


[1m23633/23633[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2ms/step
Weighted F1 Score: 0.7305988329775235
Macro F1 Score: 0.23246469764482697
              precision    recall  f1-score   support

           0       0.99      0.62      0.76    628518
           1       0.02      0.74      0.03       584
           2       0.47      0.85      0.60     38404
           3       0.14      0.77      0.24      3086
           4       0.56      0.80      0.66     51854
           5       0.05      0.66      0.09      1568
           6       0.06      0.82      0.11      1616
           7       0.19      0.68      0.29      1779
           8       0.00      0.67      0.00         3
           9       0.00      0.36      0.00        11
          10       0.39      0.92      0.55     27208
          11       0.06      0.85      0.11       966
          12       0.01      0.04      0.01       441
          13       0.00      1.00      0.00         6
          14       0.01      0.95  

In [9]:
# import tensorflow as tf
# from tensorflow.keras.models import load_model, Model
# from tensorflow.keras.layers import Dense, Input
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.layers import Flatten

# # Load the pre-trained model
# pretrained_model = load_model("/kaggle/input/unsw-nb15-gru-model-h5/unsw_nb15_gru_model.h5")

# # Freeze all layers except the last layer
# for layer in pretrained_model.layers[:-1]:
#     layer.trainable = False

# # Check the model summary before modification
# print("Pre-trained model summary:")
# pretrained_model.summary()

# # Number of classes in CICIDS2017 dataset
# num_classes_cicids2017 = 15

# # Assuming the original input shape was (time_steps, features) -> (1, 56)
# input_layer = Input(shape=(1, 56))  # Add an explicit time_steps dimension

# # Pass input through the model except the final Dense layer
# x = input_layer
# for layer in pretrained_model.layers[:-1]:  # Exclude the last output layer
#     x = layer(x)

# # Flatten the output before the new Dense layer
# x = Flatten()(x)

# # Add a new output layer
# output_layer = Dense(num_classes_cicids2017, activation="softmax")(x)

# # Create the fine-tuned model
# fine_tuned_model = Model(inputs=input_layer, outputs=output_layer)

# # Compile the fine-tuned model
# fine_tuned_model.compile(
#     optimizer=Adam(learning_rate=1e-4),
#     loss="sparse_categorical_crossentropy",
#     metrics=["accuracy"]
# )

# # Print model summary
# fine_tuned_model.summary()


In [11]:
# # Reshape the training and validation data to (samples, time_steps, features)
# X_train_resampled = X_train_resampled.reshape(X_train_resampled.shape[0], 1, 56)
# X_val = X_val.reshape(X_val.shape[0], 1, 56)



# # Train the fine-tuned model
# history = fine_tuned_model.fit(
#     X_train_resampled,  # Features from CICIDS2017
#     y_train_resampled,  # Labels from CICIDS2017
#     validation_data=(X_val, y_val),  # Validation set
#     epochs=50,  # Adjust based on the dataset size
#     batch_size=64,  # Adjust batch size as needed
#     verbose=2,
#     callbacks=[model_checkpoint, model_checkpoint_weight]
# )

# Epoch 50/50
# 22687/22687 - 48s - 2ms/step - accuracy: 0.7439 - loss: 0.7466 - val_accuracy: 0.5533 - val_loss: 2.0748


In [12]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled = X_train_resampled.reshape(X_train_resampled.shape[0], 1, 56)
X_val = X_val.reshape(X_val.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled,  # Features from CICIDS2017
    y_train_resampled,  # Labels from CICIDS2017
    validation_data=(X_val, y_val),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)


Epoch 1/500

Epoch 1: val_accuracy improved from -inf to 0.55917, saving model to /kaggle/working/checkpoint_model_gru.keras

Epoch 1: val_accuracy improved from -inf to 0.55917, saving model to /kaggle/working/checkpoint_model_gru.weights.h5
726/726 - 9s - 12ms/step - accuracy: 0.7275 - loss: 0.8216 - val_accuracy: 0.5592 - val_loss: 2.3696
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.55917

Epoch 2: val_accuracy did not improve from 0.55917
726/726 - 5s - 7ms/step - accuracy: 0.7270 - loss: 0.8002 - val_accuracy: 0.5539 - val_loss: 2.3666
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.55917

Epoch 3: val_accuracy did not improve from 0.55917
726/726 - 5s - 7ms/step - accuracy: 0.7293 - loss: 0.7902 - val_accuracy: 0.5483 - val_loss: 2.3622
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.55917

Epoch 4: val_accuracy did not improve from 0.55917
726/726 - 5s - 7ms/step - accuracy: 0.7293 - loss: 0.7829 - val_accuracy: 0.5484 - val_loss: 2.3536
Epoch 5/

In [13]:
# Initial split into training and test sets
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(
    X, y, test_size=0.3, random_state=43, stratify=y
)

# Further split the training set into training and validation sets
X_train_1, X_val_1, y_train_1, y_val_1 = train_test_split(
    X_train_1, y_train_1, test_size=0.2, random_state=43, stratify=y_train_1
)
# Check for extremely large values
print("Max value in X_train_1:", np.max(X_train_1))
print("Min value in X_train_1:", np.min(X_train_1))

X_train_1_scaled = scaler.fit_transform(X_train_1)

# Get the original class distribution
class_counts_1 = Counter(y_train_1)
print("Original class distribution:", class_counts_1)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_1 = class_counts_1[min(class_counts_1, key=class_counts_1.get)]
desired_majority_size_1 = minority_class_size_1 * 5

# Create the sampling strategy dictionary
sampling_strategy_1 = {0: desired_majority_size_1, 1: minority_class_size_1}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_1 = RandomUnderSampler(sampling_strategy=sampling_strategy_1, random_state=42)
X_resampled_1, y_resampled_1 = undersampler_1.fit_resample(X_train_1, y_train_1)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_1))

# Apply SMOTE on the smaller subset
X_train_resampled_1, y_train_resampled_1 = smote.fit_resample(X_resampled_1, y_resampled_1)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_1))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_1))


Max value in X_train_1: 2071000000.0
Min value in X_train_1: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [14]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_1 = X_train_resampled_1.reshape(X_train_resampled_1.shape[0], 1, 56)
X_val_1 = X_val_1.reshape(X_val_1.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_1,  # Features from CICIDS2017
    y_train_resampled_1,  # Labels from CICIDS2017
    validation_data=(X_val_1, y_val_1),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.58351

Epoch 1: val_accuracy did not improve from 0.58351
726/726 - 5s - 7ms/step - accuracy: 0.7074 - loss: 1.0860 - val_accuracy: 0.5588 - val_loss: 2.4702
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.58351

Epoch 2: val_accuracy did not improve from 0.58351
726/726 - 5s - 7ms/step - accuracy: 0.7024 - loss: 1.0504 - val_accuracy: 0.5571 - val_loss: 2.4931
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.58351

Epoch 3: val_accuracy did not improve from 0.58351
726/726 - 6s - 8ms/step - accuracy: 0.6977 - loss: 1.0325 - val_accuracy: 0.5575 - val_loss: 2.5119
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.58351

Epoch 4: val_accuracy did not improve from 0.58351
726/726 - 6s - 8ms/step - accuracy: 0.7009 - loss: 1.0193 - val_accuracy: 0.5576 - val_loss: 2.5213
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.58351

Epoch 5: val_accuracy did not improve from 0.58351
726/726 - 5s - 7ms/s

In [15]:
# Initial split into training and test sets
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(
    X, y, test_size=0.3, random_state=44, stratify=y
)

# Further split the training set into training and validation sets
X_train_2, X_val_2, y_train_2, y_val_2 = train_test_split(
    X_train_2, y_train_2, test_size=0.2, random_state=44, stratify=y_train_2
)

# Check for extremely large values
print("Max value in X_train_2:", np.max(X_train_2))
print("Min value in X_train_2:", np.min(X_train_2))

X_train_2_scaled = scaler.fit_transform(X_train_2)
# Get the original class distribution
class_counts_2 = Counter(y_train_2)
print("Original class distribution:", class_counts_2)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_2 = class_counts_2[min(class_counts_2, key=class_counts_2.get)]
desired_majority_size_2 = minority_class_size_2 * 5

# Create the sampling strategy dictionary
sampling_strategy_2 = {0: desired_majority_size_2, 1: minority_class_size_2}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_2 = RandomUnderSampler(sampling_strategy=sampling_strategy_2, random_state=42)
X_resampled_2, y_resampled_2 = undersampler_2.fit_resample(X_train_2, y_train_2)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_2))

# Apply SMOTE on the smaller subset
X_train_resampled_2, y_train_resampled_2 = smote.fit_resample(X_resampled_2, y_resampled_2)

#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_2))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_2))

Max value in X_train_2: 2071000000.0
Min value in X_train_2: -9663668122.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [16]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_2 = X_train_resampled_2.reshape(X_train_resampled_2.shape[0], 1, 56)
X_val_2 = X_val_2.reshape(X_val_2.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_2,  # Features from CICIDS2017
    y_train_resampled_2,  # Labels from CICIDS2017
    validation_data=(X_val_2, y_val_2),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59692

Epoch 1: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/step - accuracy: 0.7389 - loss: 0.8497 - val_accuracy: 0.5822 - val_loss: 2.4343
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59692

Epoch 2: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/step - accuracy: 0.7374 - loss: 0.8450 - val_accuracy: 0.5828 - val_loss: 2.4092
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59692

Epoch 3: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/step - accuracy: 0.7378 - loss: 0.8417 - val_accuracy: 0.5828 - val_loss: 2.3890
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59692

Epoch 4: val_accuracy did not improve from 0.59692
726/726 - 6s - 8ms/step - accuracy: 0.7377 - loss: 0.8387 - val_accuracy: 0.5818 - val_loss: 2.3741
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59692

Epoch 5: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/s

In [17]:
# Initial split into training and test sets
X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(
    X, y, test_size=0.3, random_state=45, stratify=y
)

# Further split the training set into training and validation sets
X_train_3, X_val_3, y_train_3, y_val_3 = train_test_split(
    X_train_3, y_train_3, test_size=0.3, random_state=44, stratify=y_train_3
)

# Check for extremely large values
print("Max value in X_train_3:", np.max(X_train_3))
print("Min value in X_train_3:", np.min(X_train_3))

X_train_3_scaled = scaler.fit_transform(X_train_3)

# Get the original class distribution
class_counts_3 = Counter(y_train_3)
print("Original class distribution:", class_counts_3)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_3 = class_counts_3[min(class_counts_3, key=class_counts_3.get)]
desired_majority_size_3 = minority_class_size_3 * 5

# Create the sampling strategy dictionary
sampling_strategy_3 = {0: desired_majority_size_3, 1: minority_class_size_3}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_3 = RandomUnderSampler(sampling_strategy=sampling_strategy_3, random_state=42)
X_resampled_3, y_resampled_3 = undersampler_3.fit_resample(X_train_3, y_train_3)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_3))

# Apply SMOTE on the smaller subset
X_train_resampled_3, y_train_resampled_3 = smote.fit_resample(X_resampled_3, y_resampled_3)

#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_3))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_3))



Max value in X_train_3: 2071000000.0
Min value in X_train_3: -9663668122.0
Original class distribution: Counter({0: 1026577, 4: 84694, 2: 62727, 10: 44440, 3: 5040, 7: 2906, 6: 2638, 5: 2562, 11: 1577, 1: 955, 12: 720, 14: 319, 9: 18, 13: 11, 8: 6})
Class distribution after undersampling: Counter({4: 84694, 2: 62727, 10: 44440, 3: 5040, 7: 2906, 6: 2638, 5: 2562, 11: 1577, 12: 720, 14: 319, 0: 30, 9: 18, 13: 11, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 84694, 2: 62727, 10: 44440, 3: 5040, 7: 2906, 6: 2638, 5: 2562, 11: 1577, 12: 720, 14: 319, 0: 30, 9: 18, 13: 11, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 84694, 1: 84694, 2: 84694, 3: 84694, 4: 84694, 5: 84694, 6: 84694, 7: 84694, 8: 84694, 9: 84694, 10: 84694, 11: 84694, 12: 84694, 13: 84694, 14: 84694})


In [18]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_3 = X_train_resampled_3.reshape(X_train_resampled_3.shape[0], 1, 56)
X_val_3 = X_val_3.reshape(X_val_3.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_3,  # Features from CICIDS2017
    y_train_resampled_3,  # Labels from CICIDS2017
    validation_data=(X_val_3, y_val_3),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59692

Epoch 1: val_accuracy did not improve from 0.59692
636/636 - 6s - 9ms/step - accuracy: 0.7519 - loss: 0.7427 - val_accuracy: 0.5423 - val_loss: 1.9237
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59692

Epoch 2: val_accuracy did not improve from 0.59692
636/636 - 5s - 8ms/step - accuracy: 0.7558 - loss: 0.7253 - val_accuracy: 0.5477 - val_loss: 1.8958
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59692

Epoch 3: val_accuracy did not improve from 0.59692
636/636 - 5s - 8ms/step - accuracy: 0.7562 - loss: 0.7181 - val_accuracy: 0.5511 - val_loss: 1.8837
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59692

Epoch 4: val_accuracy did not improve from 0.59692
636/636 - 5s - 8ms/step - accuracy: 0.7634 - loss: 0.7127 - val_accuracy: 0.5560 - val_loss: 1.8712
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59692

Epoch 5: val_accuracy did not improve from 0.59692
636/636 - 5s - 8ms/s

In [19]:
# Initial split into training and test sets
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(
    X, y, test_size=0.3, random_state=46, stratify=y
)

# Further split the training set into training and validation sets
X_train_4, X_val_4, y_train_4, y_val_4 = train_test_split(
    X_train_4, y_train_4, test_size=0.2, random_state=46, stratify=y_train_4
)

# Check for extremely large values
print("Max value in X_train_4:", np.max(X_train_4))
print("Min value in X_train_4:", np.min(X_train_4))

X_train_4_scaled = scaler.fit_transform(X_train_4)

# Get the original class distribution
class_counts_4 = Counter(y_train_4)
print("Original class distribution:", class_counts_4)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_4 = class_counts_4[min(class_counts_4, key=class_counts_4.get)]
desired_majority_size_4 = minority_class_size_4 * 5

# Create the sampling strategy dictionary
sampling_strategy_4 = {0: desired_majority_size_4, 1: minority_class_size_4}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_4 = RandomUnderSampler(sampling_strategy=sampling_strategy_4, random_state=42)
X_resampled_4, y_resampled_4 = undersampler_4.fit_resample(X_train_4, y_train_4)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_4))

# Apply SMOTE on the smaller subset
X_train_resampled_4, y_train_resampled_4 = smote.fit_resample(X_resampled_4, y_resampled_4)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_4))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_4))



Max value in X_train_4: 2071000000.0
Min value in X_train_4: -9663668122.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [20]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_4 = X_train_resampled_4.reshape(X_train_resampled_4.shape[0], 1, 56)
X_val_4 = X_val_4.reshape(X_val_4.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_4,  # Features from CICIDS2017
    y_train_resampled_4,  # Labels from CICIDS2017
    validation_data=(X_val_4, y_val_4),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59692

Epoch 1: val_accuracy did not improve from 0.59692
726/726 - 6s - 8ms/step - accuracy: 0.7171 - loss: 1.3431 - val_accuracy: 0.5878 - val_loss: 2.1890
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59692

Epoch 2: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/step - accuracy: 0.7170 - loss: 1.2689 - val_accuracy: 0.5876 - val_loss: 2.1846
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59692

Epoch 3: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/step - accuracy: 0.7178 - loss: 1.2161 - val_accuracy: 0.5870 - val_loss: 2.1915
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59692

Epoch 4: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/step - accuracy: 0.7185 - loss: 1.1697 - val_accuracy: 0.5837 - val_loss: 2.2023
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59692

Epoch 5: val_accuracy did not improve from 0.59692
726/726 - 5s - 7ms/s

In [21]:
# Initial split into training and test sets
X_train_5, X_test_5, y_train_5, y_test_5 = train_test_split(
    X, y, test_size=0.3, random_state=47, stratify=y
)

# Further split the training set into training and validation sets
X_train_5, X_val_5, y_train_5, y_val_5 = train_test_split(
    X_train_5, y_train_5, test_size=0.2, random_state=47, stratify=y_train_5
)

# Check for extremely large values
print("Max value in X_train_5:", np.max(X_train_5))
print("Min value in X_train_5:", np.min(X_train_5))

X_train_5_scaled = scaler.fit_transform(X_train_5)

# Get the original class distribution
class_counts_5 = Counter(y_train_5)
print("Original class distribution:", class_counts_5)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_5 = class_counts_5[min(class_counts_5, key=class_counts_5.get)]
desired_majority_size_5 = minority_class_size_5 * 5

# Create the sampling strategy dictionary
sampling_strategy_5 = {0: desired_majority_size_5, 1: minority_class_size_5}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_5 = RandomUnderSampler(sampling_strategy=sampling_strategy_5, random_state=42)
X_resampled_5, y_resampled_5 = undersampler_5.fit_resample(X_train_5, y_train_5)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_5))

# Apply SMOTE on the smaller subset
X_train_resampled_5, y_train_resampled_5 = smote.fit_resample(X_resampled_5, y_resampled_5)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_5))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_5))

Max value in X_train_5: 2071000000.0
Min value in X_train_5: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [22]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_5 = X_train_resampled_5.reshape(X_train_resampled_5.shape[0], 1, 56)
X_val_5 = X_val_5.reshape(X_val_5.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_5,  # Features from CICIDS2017
    y_train_resampled_5,  # Labels from CICIDS2017
    validation_data=(X_val_5, y_val_5),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59840

Epoch 1: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7248 - loss: 0.7351 - val_accuracy: 0.5459 - val_loss: 2.0978
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59840

Epoch 2: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/step - accuracy: 0.7244 - loss: 0.7286 - val_accuracy: 0.5472 - val_loss: 2.1008
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59840

Epoch 3: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7240 - loss: 0.7258 - val_accuracy: 0.5479 - val_loss: 2.0984
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59840

Epoch 4: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7251 - loss: 0.7235 - val_accuracy: 0.5479 - val_loss: 2.0997
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59840

Epoch 5: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/s

In [23]:
# Initial split into training and test sets
X_train_6, X_test_6, y_train_6, y_test_6 = train_test_split(
    X, y, test_size=0.3, random_state=48, stratify=y
)

# Further split the training set into training and validation sets
X_train_6, X_val_6, y_train_6, y_val_6 = train_test_split(
    X_train_6, y_train_6, test_size=0.2, random_state=48, stratify=y_train_6
)

# Check for extremely large values
print("Max value in X_train_6:", np.max(X_train_6))
print("Min value in X_train_6:", np.min(X_train_6))

X_train_6_scaled = scaler.fit_transform(X_train_6)

# Get the original class distribution
class_counts_6 = Counter(y_train_6)
print("Original class distribution:", class_counts_6)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_6 = class_counts_6[min(class_counts_6, key=class_counts_6.get)]
desired_majority_size_6 = minority_class_size_6 * 5

# Create the sampling strategy dictionary
sampling_strategy_6 = {0: desired_majority_size_6, 1: minority_class_size_6}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_6 = RandomUnderSampler(sampling_strategy=sampling_strategy_6, random_state=42)
X_resampled_6, y_resampled_6 = undersampler_6.fit_resample(X_train_6, y_train_6)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_6))

# Apply SMOTE on the smaller subset
X_train_resampled_6, y_train_resampled_6 = smote.fit_resample(X_resampled_6, y_resampled_6)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_6))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_6))

Max value in X_train_6: 2071000000.0
Min value in X_train_6: -9663668122.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [24]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_6 = X_train_resampled_6.reshape(X_train_resampled_6.shape[0], 1, 56)
X_val_6 = X_val_6.reshape(X_val_6.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_6,  # Features from CICIDS2017
    y_train_resampled_6,  # Labels from CICIDS2017
    validation_data=(X_val_6, y_val_6),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59840

Epoch 1: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/step - accuracy: 0.7313 - loss: 0.8944 - val_accuracy: 0.5703 - val_loss: 2.1691
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59840

Epoch 2: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7304 - loss: 0.8819 - val_accuracy: 0.5715 - val_loss: 2.1355
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59840

Epoch 3: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7251 - loss: 0.8757 - val_accuracy: 0.5715 - val_loss: 2.1149
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59840

Epoch 4: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/step - accuracy: 0.7242 - loss: 0.8711 - val_accuracy: 0.5805 - val_loss: 2.1084
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59840

Epoch 5: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/s

In [25]:
# Initial split into training and test sets
X_train_7, X_test_7, y_train_7, y_test_7 = train_test_split(
    X, y, test_size=0.3, random_state=49, stratify=y
)

# Further split the training set into training and validation sets
X_train_7, X_val_7, y_train_7, y_val_7 = train_test_split(
    X_train_7, y_train_7, test_size=0.2, random_state=49, stratify=y_train_7
)

# Check for extremely large values
print("Max value in X_train_7:", np.max(X_train_7))
print("Min value in X_train_7:", np.min(X_train_7))

scaler = StandardScaler()
X_train_7_scaled = scaler.fit_transform(X_train_7)

# Get the original class distribution
class_counts_7 = Counter(y_train_7)
print("Original class distribution:", class_counts_7)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_7 = class_counts_7[min(class_counts_7, key=class_counts_7.get)]
desired_majority_size_7 = minority_class_size_7 * 5

# Create the sampling strategy dictionary
sampling_strategy_7 = {0: desired_majority_size_7, 1: minority_class_size_7}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_7 = RandomUnderSampler(sampling_strategy=sampling_strategy_7, random_state=42)
X_resampled_7, y_resampled_7 = undersampler_7.fit_resample(X_train_7, y_train_7)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_7))

# Apply SMOTE on the smaller subset
smote = SMOTE(random_state=42)
X_train_resampled_7, y_train_resampled_7 = smote.fit_resample(X_resampled_7, y_resampled_7)

#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_7))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_7))




Max value in X_train_7: 2071000000.0
Min value in X_train_7: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [26]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_7 = X_train_resampled_7.reshape(X_train_resampled_7.shape[0], 1, 56)
X_val_7 = X_val_7.reshape(X_val_7.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_7,  # Features from CICIDS2017
    y_train_resampled_7,  # Labels from CICIDS2017
    validation_data=(X_val_7, y_val_7),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59840

Epoch 1: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7236 - loss: 0.8165 - val_accuracy: 0.5481 - val_loss: 1.9463
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59840

Epoch 2: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7373 - loss: 0.7947 - val_accuracy: 0.4152 - val_loss: 1.9184
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59840

Epoch 3: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7369 - loss: 0.7869 - val_accuracy: 0.4146 - val_loss: 1.8993
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59840

Epoch 4: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7378 - loss: 0.7827 - val_accuracy: 0.4146 - val_loss: 1.8825
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59840

Epoch 5: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/s

In [27]:
# Initial split into training and test sets
X_train_8, X_test_8, y_train_8, y_test_8 = train_test_split(
    X, y, test_size=0.3, random_state=50, stratify=y
)

# Further split the training set into training and validation sets
X_train_8, X_val_8, y_train_8, y_val_8 = train_test_split(
    X_train_8, y_train_8, test_size=0.2, random_state=50, stratify=y_train_8
)

# Check for extremely large values
print("Max value in X_train_8:", np.max(X_train_8))
print("Min value in X_train_8:", np.min(X_train_8))

X_train_8_scaled = scaler.fit_transform(X_train_8)

# Get the original class distribution
class_counts_8 = Counter(y_train_8)
print("Original class distribution:", class_counts_8)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_8 = class_counts_8[min(class_counts_8, key=class_counts_8.get)]
desired_majority_size_8 = minority_class_size_8 * 5

# Create the sampling strategy dictionary
sampling_strategy_8 = {0: desired_majority_size_8, 1: minority_class_size_8}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_8 = RandomUnderSampler(sampling_strategy=sampling_strategy_8, random_state=42)
X_resampled_8, y_resampled_8 = undersampler_8.fit_resample(X_train_8, y_train_8)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_8))

# Apply SMOTE on the smaller subset
X_train_resampled_8, y_train_resampled_8 = smote.fit_resample(X_resampled_8, y_resampled_8)

#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_8))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_8))

Max value in X_train_8: 2071000000.0
Min value in X_train_8: -9663668122.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [28]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_8 = X_train_resampled_8.reshape(X_train_resampled_8.shape[0], 1, 56)
X_val_8 = X_val_8.reshape(X_val_8.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_8,  # Features from CICIDS2017
    y_train_resampled_8,  # Labels from CICIDS2017
    validation_data=(X_val_8, y_val_8),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59840
726/726 - 5s - 8ms/step - accuracy: 0.7207 - loss: 0.8362 - val_accuracy: 0.5647 - val_loss: 1.7327
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7237 - loss: 0.8246 - val_accuracy: 0.5666 - val_loss: 1.7143
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7228 - loss: 0.8191 - val_accuracy: 0.5687 - val_loss: 1.7042
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/step - accuracy: 0.7219 - loss: 0.8150 - val_accuracy: 0.5659 - val_loss: 1.7007
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7203 - loss: 0.8115 - val_accuracy: 0.5549 - val_loss: 1.6945
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.59840
726/726 - 5s - 7ms/step - accuracy: 0.7200 - loss: 0.8084 - val_accuracy: 0.5676 - val_loss: 1.6953
Epoch 7/500

Epo

In [29]:
# Initial split into training and test sets
X_train_9, X_test_9, y_train_9, y_test_9 = train_test_split(
    X, y, test_size=0.3, random_state=51, stratify=y
)

# Further split the training set into training and validation sets
X_train_9, X_val_9, y_train_9, y_val_9 = train_test_split(
    X_train_9, y_train_9, test_size=0.2, random_state=51, stratify=y_train_9
)

# Check for extremely large values
print("Max value in X_train_9:", np.max(X_train_9))
print("Min value in X_train_9:", np.min(X_train_9))

X_train_9_scaled = scaler.fit_transform(X_train_9)

# Get the original class distribution
class_counts_9 = Counter(y_train_9)
print("Original class distribution:", class_counts_9)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_9 = class_counts_9[min(class_counts_9, key=class_counts_9.get)]
desired_majority_size_9 = minority_class_size_9 * 5

# Create the sampling strategy dictionary
sampling_strategy_9 = {0: desired_majority_size_9, 1: minority_class_size_9}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_9 = RandomUnderSampler(sampling_strategy=sampling_strategy_9, random_state=42)
X_resampled_9, y_resampled_9 = undersampler_9.fit_resample(X_train_9, y_train_9)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_9))

# Apply SMOTE on the smaller subset
X_train_resampled_9, y_train_resampled_9 = smote.fit_resample(X_resampled_9, y_resampled_9)


#Verify the class distribution after SMOTE
print("Class Distribution Before SMOTE:", Counter(y_resampled_9))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_9))


Max value in X_train_9: 2071000000.0
Min value in X_train_9: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [30]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_9 = X_train_resampled_9.reshape(X_train_resampled_9.shape[0], 1, 56)
X_val_9 = X_val_9.reshape(X_val_9.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_9,  # Features from CICIDS2017
    y_train_resampled_9,  # Labels from CICIDS2017
    validation_data=(X_val_9, y_val_9),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59840

Epoch 1: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/step - accuracy: 0.7220 - loss: 1.1327 - val_accuracy: 0.5775 - val_loss: 1.7688
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59840

Epoch 2: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/step - accuracy: 0.7208 - loss: 1.1026 - val_accuracy: 0.5769 - val_loss: 1.7540
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59840

Epoch 3: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/step - accuracy: 0.7215 - loss: 1.0803 - val_accuracy: 0.5771 - val_loss: 1.7478
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59840

Epoch 4: val_accuracy did not improve from 0.59840
726/726 - 8s - 10ms/step - accuracy: 0.7214 - loss: 1.0607 - val_accuracy: 0.5757 - val_loss: 1.7468
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59840

Epoch 5: val_accuracy did not improve from 0.59840
726/726 - 6s - 8ms/

In [7]:
# Initial split into training and test sets
X_train_10, X_test_10, y_train_10, y_test_10 = train_test_split(
    X, y, test_size=0.3, random_state=52, stratify=y
)

# Further split the training set into training and validation sets
X_train_10, X_val_10, y_train_10, y_val_10 = train_test_split(
    X_train_10, y_train_10, test_size=0.2, random_state=52, stratify=y_train_10
)

# Check for extremely large values
print("Max value in X_train_10:", np.max(X_train_10))
print("Min value in X_train_10:", np.min(X_train_10))

X_train_10_scaled = scaler.fit_transform(X_train_10)

# Get the original class distribution
class_counts_10 = Counter(y_train_10)
print("Original class distribution:", class_counts_10)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_10 = class_counts_10[min(class_counts_10, key=class_counts_10.get)]
desired_majority_size_10 = minority_class_size_10 * 5

# Create the sampling strategy dictionary
sampling_strategy_10 = {0: desired_majority_size_10, 1: minority_class_size_10}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_10 = RandomUnderSampler(sampling_strategy=sampling_strategy_10, random_state=42)
X_resampled_10, y_resampled_10 = undersampler_10.fit_resample(X_train_10, y_train_10)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_10))

# Apply SMOTE on the smaller subset
X_train_resampled_10, y_train_resampled_10 = smote.fit_resample(X_resampled_10, y_resampled_10)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_10))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_10))




Max value in X_train_10: 2071000000.0
Min value in X_train_10: -6442447920.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [8]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_10 = X_train_resampled_10.reshape(X_train_resampled_10.shape[0], 1, 56)
X_val_10 = X_val_10.reshape(X_val_10.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_10,  # Features from CICIDS2017
    y_train_resampled_10,  # Labels from CICIDS2017
    validation_data=(X_val_10, y_val_10),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy improved from -inf to 0.58002, saving model to /kaggle/working/checkpoint_model_gru.keras

Epoch 1: val_accuracy improved from -inf to 0.58002, saving model to /kaggle/working/checkpoint_model_gru.weights.h5
726/726 - 7s - 9ms/step - accuracy: 0.7414 - loss: 0.7819 - val_accuracy: 0.5800 - val_loss: 2.2925
Epoch 2/500

Epoch 2: val_accuracy improved from 0.58002 to 0.58970, saving model to /kaggle/working/checkpoint_model_gru.keras

Epoch 2: val_accuracy improved from 0.58002 to 0.58970, saving model to /kaggle/working/checkpoint_model_gru.weights.h5
726/726 - 4s - 5ms/step - accuracy: 0.7445 - loss: 0.7663 - val_accuracy: 0.5897 - val_loss: 2.2449
Epoch 3/500

Epoch 3: val_accuracy improved from 0.58970 to 0.59505, saving model to /kaggle/working/checkpoint_model_gru.keras

Epoch 3: val_accuracy improved from 0.58970 to 0.59505, saving model to /kaggle/working/checkpoint_model_gru.weights.h5
726/726 - 3s - 5ms/step - accuracy: 0.7446 - loss: 0.7587 -

In [9]:
# Initial split into training and test sets
X_train_11, X_test_11, y_train_11, y_test_11 = train_test_split(
    X, y, test_size=0.3, random_state=53, stratify=y
)

# Further split the training set into training and validation sets
X_train_11, X_val_11, y_train_11, y_val_11 = train_test_split(
    X_train_11, y_train_11, test_size=0.2, random_state=53, stratify=y_train_11
)

# Check for extremely large values
print("Max value in X_train_11:", np.max(X_train_11))
print("Min value in X_train_11:", np.min(X_train_11))

X_train_11_scaled = scaler.fit_transform(X_train_11)

# Get the original class distribution
class_counts_11 = Counter(y_train_11)
print("Original class distribution:", class_counts_11)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_11 = class_counts_11[min(class_counts_11, key=class_counts_11.get)]
desired_majority_size_11 = minority_class_size_11 * 5

# Create the sampling strategy dictionary
sampling_strategy_11 = {0: desired_majority_size_11, 1: minority_class_size_11}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_11 = RandomUnderSampler(sampling_strategy=sampling_strategy_11, random_state=42)
X_resampled_11, y_resampled_11 = undersampler_11.fit_resample(X_train_11, y_train_11)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_11))

# Apply SMOTE on the smaller subset
X_train_resampled_11, y_train_resampled_11 = smote.fit_resample(X_resampled_11, y_resampled_11)


#Verify the class distribution after SMOTE
print("Class Distribution Before SMOTE:", Counter(y_resampled_11))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_11))

Max value in X_train_11: 2071000000.0
Min value in X_train_11: -6442447920.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [10]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_11 = X_train_resampled_11.reshape(X_train_resampled_11.shape[0], 1, 56)
X_val_11 = X_val_11.reshape(X_val_11.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_11,  # Features from CICIDS2017
    y_train_resampled_11,  # Labels from CICIDS2017
    validation_data=(X_val_11, y_val_11),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60553

Epoch 1: val_accuracy did not improve from 0.60553
726/726 - 4s - 5ms/step - accuracy: 0.7340 - loss: 0.8168 - val_accuracy: 0.5837 - val_loss: 1.8277
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60553

Epoch 2: val_accuracy did not improve from 0.60553
726/726 - 4s - 5ms/step - accuracy: 0.7431 - loss: 0.7951 - val_accuracy: 0.6020 - val_loss: 1.8398
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60553

Epoch 3: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7450 - loss: 0.7889 - val_accuracy: 0.6022 - val_loss: 1.8341
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60553

Epoch 4: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7415 - loss: 0.7845 - val_accuracy: 0.4631 - val_loss: 1.8271
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60553

Epoch 5: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/s

In [11]:
# Initial split into training and test sets
X_train_12, X_test_12, y_train_12, y_test_12 = train_test_split(
    X, y, test_size=0.3, random_state=54, stratify=y
)

# Further split the training set into training and validation sets
X_train_12, X_val_12, y_train_12, y_val_12 = train_test_split(
    X_train_12, y_train_12, test_size=0.2, random_state=54, stratify=y_train_12
)

# Check for extremely large values
print("Max value in X_train_12:", np.max(X_train_12))
print("Min value in X_train_12:", np.min(X_train_12))

X_train_12_scaled = scaler.fit_transform(X_train_12)

# Get the original class distribution
class_counts_12 = Counter(y_train_12)
print("Original class distribution:", class_counts_12)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_12 = class_counts_12[min(class_counts_12, key=class_counts_12.get)]
desired_majority_size_12 = minority_class_size_12 * 5

# Create the sampling strategy dictionary
sampling_strategy_12 = {0: desired_majority_size_12, 1: minority_class_size_12}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_12 = RandomUnderSampler(sampling_strategy=sampling_strategy_12, random_state=42)
X_resampled_12, y_resampled_12 = undersampler_12.fit_resample(X_train_12, y_train_12)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_12))

# Apply SMOTE on the smaller subset
X_train_resampled_12, y_train_resampled_12 = smote.fit_resample(X_resampled_12, y_resampled_12)


#Verify the class distribution after SMOTE
print("Class Distribution Before SMOTE:", Counter(y_resampled_12))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_12))

Max value in X_train_12: 2071000000.0
Min value in X_train_12: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [12]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_12 = X_train_resampled_12.reshape(X_train_resampled_12.shape[0], 1, 56)
X_val_12 = X_val_12.reshape(X_val_12.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_12,  # Features from CICIDS2017
    y_train_resampled_12,  # Labels from CICIDS2017
    validation_data=(X_val_12, y_val_12),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60553

Epoch 1: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7557 - loss: 0.8154 - val_accuracy: 0.5663 - val_loss: 1.7518
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60553

Epoch 2: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7553 - loss: 0.7974 - val_accuracy: 0.5694 - val_loss: 1.7450
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60553

Epoch 3: val_accuracy did not improve from 0.60553
726/726 - 3s - 4ms/step - accuracy: 0.7551 - loss: 0.7905 - val_accuracy: 0.5696 - val_loss: 1.7439
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60553

Epoch 4: val_accuracy did not improve from 0.60553
726/726 - 3s - 4ms/step - accuracy: 0.7553 - loss: 0.7865 - val_accuracy: 0.5751 - val_loss: 1.7456
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60553

Epoch 5: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/s

In [13]:
# Initial split into training and test sets
X_train_13, X_test_13, y_train_13, y_test_13 = train_test_split(
    X, y, test_size=0.3, random_state=55, stratify=y
)

# Further split the training set into training and validation sets
X_train_13, X_val_13, y_train_13, y_val_13 = train_test_split(
    X_train_13, y_train_13, test_size=0.2, random_state=55, stratify=y_train_13
)

# Check for extremely large values
print("Max value in X_train_13:", np.max(X_train_13))
print("Min value in X_train_13:", np.min(X_train_13))

X_train_13_scaled = scaler.fit_transform(X_train_13)

# Get the original class distribution
class_counts_13 = Counter(y_train_13)
print("Original class distribution:", class_counts_13)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_13 = class_counts_13[min(class_counts_13, key=class_counts_13.get)]
desired_majority_size_13 = minority_class_size_13 * 5

# Create the sampling strategy dictionary
sampling_strategy_13 = {0: desired_majority_size_13, 1: minority_class_size_13}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_13 = RandomUnderSampler(sampling_strategy=sampling_strategy_13, random_state=42)
X_resampled_13, y_resampled_13 = undersampler_13.fit_resample(X_train_13, y_train_13)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_13))

# Apply SMOTE on the smaller subset
X_train_resampled_13, y_train_resampled_13 = smote.fit_resample(X_resampled_13, y_resampled_13)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_13))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_13))


Max value in X_train_13: 2071000000.0
Min value in X_train_13: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [14]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_13 = X_train_resampled_13.reshape(X_train_resampled_13.shape[0], 1, 56)
X_val_13 = X_val_13.reshape(X_val_13.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_13,  # Features from CICIDS2017
    y_train_resampled_13,  # Labels from CICIDS2017
    validation_data=(X_val_13, y_val_13),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60553

Epoch 1: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7178 - loss: 0.8477 - val_accuracy: 0.5922 - val_loss: 1.7920
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60553

Epoch 2: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7192 - loss: 0.8410 - val_accuracy: 0.5942 - val_loss: 1.7948
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60553

Epoch 3: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7199 - loss: 0.8379 - val_accuracy: 0.5942 - val_loss: 1.8039
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60553

Epoch 4: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/step - accuracy: 0.7203 - loss: 0.8353 - val_accuracy: 0.5943 - val_loss: 1.8036
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60553

Epoch 5: val_accuracy did not improve from 0.60553
726/726 - 3s - 5ms/s

In [15]:
# Initial split into training and test sets
X_train_14, X_test_14, y_train_14, y_test_14 = train_test_split(
    X, y, test_size=0.3, random_state=56, stratify=y
)

# Further split the training set into training and validation sets
X_train_14, X_val_14, y_train_14, y_val_14 = train_test_split(
    X_train_14, y_train_14, test_size=0.2, random_state=56, stratify=y_train_14
)

# Check for extremely large values
print("Max value in X_train_14:", np.max(X_train_14))
print("Min value in X_train_14:", np.min(X_train_14))

X_train_14_scaled = scaler.fit_transform(X_train_14)

# Get the original class distribution
class_counts_14 = Counter(y_train_14)
print("Original class distribution:", class_counts_14)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_14 = class_counts_14[min(class_counts_14, key=class_counts_14.get)]
desired_majority_size_14 = minority_class_size_14 * 5

# Create the sampling strategy dictionary
sampling_strategy_14 = {0: desired_majority_size_14, 1: minority_class_size_14}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_14 = RandomUnderSampler(sampling_strategy=sampling_strategy_14, random_state=42)
X_resampled_14, y_resampled_14 = undersampler_14.fit_resample(X_train_14, y_train_14)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_14))

# Apply SMOTE on the smaller subset
X_train_resampled_14, y_train_resampled_14 = smote.fit_resample(X_resampled_14, y_resampled_14)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_14))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_14))



Max value in X_train_14: 2071000000.0
Min value in X_train_14: -6442447920.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [16]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_14 = X_train_resampled_14.reshape(X_train_resampled_14.shape[0], 1, 56)
X_val_14 = X_val_14.reshape(X_val_14.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_14,  # Features from CICIDS2017
    y_train_resampled_14,  # Labels from CICIDS2017
    validation_data=(X_val_14, y_val_14),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60553

Epoch 1: val_accuracy did not improve from 0.60553
726/726 - 4s - 6ms/step - accuracy: 0.7435 - loss: 0.7513 - val_accuracy: 0.5957 - val_loss: 1.8499
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60553

Epoch 2: val_accuracy did not improve from 0.60553
726/726 - 4s - 6ms/step - accuracy: 0.7465 - loss: 0.7436 - val_accuracy: 0.5962 - val_loss: 1.8361
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60553

Epoch 3: val_accuracy did not improve from 0.60553
726/726 - 4s - 6ms/step - accuracy: 0.7468 - loss: 0.7402 - val_accuracy: 0.6017 - val_loss: 1.8327
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60553

Epoch 4: val_accuracy did not improve from 0.60553
726/726 - 4s - 6ms/step - accuracy: 0.7475 - loss: 0.7375 - val_accuracy: 0.6017 - val_loss: 1.8220
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60553

Epoch 5: val_accuracy did not improve from 0.60553
726/726 - 4s - 6ms/s

In [17]:
# Initial split into training and test sets
X_train_15, X_test_15, y_train_15, y_test_15 = train_test_split(
    X, y, test_size=0.3, random_state=57, stratify=y
)

# Further split the training set into training and validation sets
X_train_15, X_val_15, y_train_15, y_val_15 = train_test_split(
    X_train_15, y_train_15, test_size=0.2, random_state=57, stratify=y_train_15
)

# Check for extremely large values
print("Max value in X_train_15:", np.max(X_train_15))
print("Min value in X_train_15:", np.min(X_train_15))

X_train_15_scaled = scaler.fit_transform(X_train_15)

# Get the original class distribution
class_counts_15 = Counter(y_train_15)
print("Original class distribution:", class_counts_15)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_15 = class_counts_15[min(class_counts_15, key=class_counts_15.get)]
desired_majority_size_15 = minority_class_size_15 * 5

# Create the sampling strategy dictionary
sampling_strategy_15 = {0: desired_majority_size_15, 1: minority_class_size_15}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_15 = RandomUnderSampler(sampling_strategy=sampling_strategy_15, random_state=42)
X_resampled_15, y_resampled_15 = undersampler_15.fit_resample(X_train_15, y_train_15)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_15))

# Apply SMOTE on the smaller subset
X_train_resampled_15, y_train_resampled_15 = smote.fit_resample(X_resampled_15, y_resampled_15)


#Verify the class distribution after SMOTE
print("Class Distribution Before SMOTE:", Counter(y_resampled_15))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_15))


Max value in X_train_15: 2071000000.0
Min value in X_train_15: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [18]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_15 = X_train_resampled_15.reshape(X_train_resampled_15.shape[0], 1, 56)
X_val_15 = X_val_15.reshape(X_val_15.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_15,  # Features from CICIDS2017
    y_train_resampled_15,  # Labels from CICIDS2017
    validation_data=(X_val_15, y_val_15),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, model_checkpoint_weight]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60553

Epoch 1: val_accuracy did not improve from 0.60553
726/726 - 4s - 6ms/step - accuracy: 0.7364 - loss: 0.8310 - val_accuracy: 0.5704 - val_loss: 2.0932
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60553

Epoch 2: val_accuracy did not improve from 0.60553
726/726 - 4s - 5ms/step - accuracy: 0.7322 - loss: 0.8003 - val_accuracy: 0.5634 - val_loss: 2.1596
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60553

Epoch 3: val_accuracy did not improve from 0.60553
726/726 - 4s - 5ms/step - accuracy: 0.7296 - loss: 0.7890 - val_accuracy: 0.5635 - val_loss: 2.1800
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60553

Epoch 4: val_accuracy did not improve from 0.60553
726/726 - 4s - 5ms/step - accuracy: 0.7295 - loss: 0.7810 - val_accuracy: 0.5620 - val_loss: 2.1852
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60553

Epoch 5: val_accuracy did not improve from 0.60553
726/726 - 4s - 5ms/s

In [7]:
# Initial split into training and test sets
X_train_16, X_test_16, y_train_16, y_test_16 = train_test_split(
    X, y, test_size=0.3, random_state=58, stratify=y
)

# Further split the training set into training and validation sets
X_train_16, X_val_16, y_train_16, y_val_16 = train_test_split(
    X_train_16, y_train_16, test_size=0.2, random_state=58, stratify=y_train_16
)

# Check for extremely large values
print("Max value in X_train_16:", np.max(X_train_16))
print("Min value in X_train_16:", np.min(X_train_16))

X_train_16_scaled = scaler.fit_transform(X_train_16)

# Get the original class distribution
class_counts_16 = Counter(y_train_16)
print("Original class distribution:", class_counts_16)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_16 = class_counts_16[min(class_counts_16, key=class_counts_16.get)]
desired_majority_size_16 = minority_class_size_16 * 5

# Create the sampling strategy dictionary
sampling_strategy_16 = {0: desired_majority_size_16, 1: minority_class_size_16}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_16 = RandomUnderSampler(sampling_strategy=sampling_strategy_16, random_state=42)
X_resampled_16, y_resampled_16 = undersampler_16.fit_resample(X_train_16, y_train_16)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_16))

# Apply SMOTE on the smaller subset
X_train_resampled_16, y_train_resampled_16 = smote.fit_resample(X_resampled_16, y_resampled_16)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_16))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_16))



Max value in X_train_16: 2071000000.0
Min value in X_train_16: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [8]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_16 = X_train_resampled_16.reshape(X_train_resampled_16.shape[0], 1, 56)
X_val_16 = X_val_16.reshape(X_val_16.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_16,  # Features from CICIDS2017
    y_train_resampled_16,  # Labels from CICIDS2017
    validation_data=(X_val_16, y_val_16),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint, checkpoint_f1, checkpoint_roc_auc]
)


Epoch 1/500

Epoch 1: val_accuracy improved from -inf to 0.62325, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 8s - 12ms/step - accuracy: 0.7271 - loss: 0.8244 - val_accuracy: 0.6233 - val_loss: 1.7092
Epoch 2/500


  self._save_model(epoch=epoch, batch=None, logs=logs)
  self._save_model(epoch=epoch, batch=None, logs=logs)



Epoch 2: val_accuracy improved from 0.62325 to 0.62332, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 5s - 7ms/step - accuracy: 0.7352 - loss: 0.8069 - val_accuracy: 0.6233 - val_loss: 1.7322
Epoch 3/500

Epoch 3: val_accuracy improved from 0.62332 to 0.62445, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 5s - 7ms/step - accuracy: 0.7359 - loss: 0.8011 - val_accuracy: 0.6245 - val_loss: 1.7456
Epoch 4/500

Epoch 4: val_accuracy improved from 0.62445 to 0.62457, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 5s - 7ms/step - accuracy: 0.7357 - loss: 0.7969 - val_accuracy: 0.6246 - val_loss: 1.7516
Epoch 5/500

Epoch 5: val_accuracy improved from 0.62457 to 0.62698, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 6s - 8ms/step - accuracy: 0.7362 - loss: 0.7935 - val_accuracy: 0.6270 - val_loss: 1.7570
Epoch 6/500

Epoch 6: val_accuracy improved from 0.62698 to 0.62699, saving model to /kaggle/working

In [7]:
# Initial split into training and test sets
X_train_17, X_test_17, y_train_17, y_test_17 = train_test_split(
    X, y, test_size=0.3, random_state=59, stratify=y
)

# Further split the training set into training and validation sets
X_train_17, X_val_17, y_train_17, y_val_17 = train_test_split(
    X_train_17, y_train_17, test_size=0.2, random_state=59, stratify=y_train_17
)

# Check for extremely large values
print("Max value in X_train_17:", np.max(X_train_17))
print("Min value in X_train_17:", np.min(X_train_17))

X_train_17_scaled = scaler.fit_transform(X_train_17)

# Get the original class distribution
class_counts_17 = Counter(y_train_17)
print("Original class distribution:", class_counts_17)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_17 = class_counts_17[min(class_counts_17, key=class_counts_17.get)]
desired_majority_size_17 = minority_class_size_17 * 5

# Create the sampling strategy dictionary
sampling_strategy_17 = {0: desired_majority_size_17, 1: minority_class_size_17}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_17 = RandomUnderSampler(sampling_strategy=sampling_strategy_17, random_state=42)
X_resampled_17, y_resampled_17 = undersampler_17.fit_resample(X_train_17, y_train_17)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_17))

# Apply SMOTE on the smaller subset
X_train_resampled_17, y_train_resampled_17 = smote.fit_resample(X_resampled_17, y_resampled_17)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_17))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_17))



Max value in X_train_17: 2071000000.0
Min value in X_train_17: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [8]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_17 = X_train_resampled_17.reshape(X_train_resampled_17.shape[0], 1, 56)
X_val_17 = X_val_17.reshape(X_val_17.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_17,  # Features from CICIDS2017
    y_train_resampled_17,  # Labels from CICIDS2017
    validation_data=(X_val_17, y_val_17),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy improved from -inf to 0.59895, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 8s - 11ms/step - accuracy: 0.7286 - loss: 0.8371 - val_accuracy: 0.5989 - val_loss: 1.9065
Epoch 2/500

Epoch 2: val_accuracy improved from 0.59895 to 0.60361, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 5s - 7ms/step - accuracy: 0.7300 - loss: 0.8275 - val_accuracy: 0.6036 - val_loss: 1.9097
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60361
726/726 - 5s - 6ms/step - accuracy: 0.7238 - loss: 0.8221 - val_accuracy: 0.4673 - val_loss: 1.9068
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60361
726/726 - 5s - 6ms/step - accuracy: 0.7221 - loss: 0.8180 - val_accuracy: 0.4595 - val_loss: 1.9138
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60361
726/726 - 4s - 6ms/step - accuracy: 0.7215 - loss: 0.8145 - val_accuracy: 0.4594 - val_loss: 1.9153
Epoch 6/500

Epoch 6: val_accuracy did not improve fro

In [9]:
# Initial split into training and test sets
X_train_18, X_test_18, y_train_18, y_test_18 = train_test_split(
    X, y, test_size=0.3, random_state=60, stratify=y
)

# Further split the training set into training and validation sets
X_train_18, X_val_18, y_train_18, y_val_18 = train_test_split(
    X_train_18, y_train_18, test_size=0.2, random_state=60, stratify=y_train_18
)

# Check for extremely large values
print("Max value in X_train_18:", np.max(X_train_18))
print("Min value in X_train_18:", np.min(X_train_18))

X_train_18_scaled = scaler.fit_transform(X_train_18)

# Get the original class distribution
class_counts_18 = Counter(y_train_18)
print("Original class distribution:", class_counts_18)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_18 = class_counts_18[min(class_counts_18, key=class_counts_18.get)]
desired_majority_size_18 = minority_class_size_18 * 5

# Create the sampling strategy dictionary
sampling_strategy_18 = {0: desired_majority_size_18, 1: minority_class_size_18}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_18 = RandomUnderSampler(sampling_strategy=sampling_strategy_18, random_state=42)
X_resampled_18, y_resampled_18 = undersampler_18.fit_resample(X_train_18, y_train_18)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_18))

# Apply SMOTE on the smaller subset
X_train_resampled_18, y_train_resampled_18 = smote.fit_resample(X_resampled_18, y_resampled_18)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_18))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_18))




Max value in X_train_18: 2071000000.0
Min value in X_train_18: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [10]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_18 = X_train_resampled_18.reshape(X_train_resampled_18.shape[0], 1, 56)
X_val_18 = X_val_18.reshape(X_val_18.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_18,  # Features from CICIDS2017
    y_train_resampled_18,  # Labels from CICIDS2017
    validation_data=(X_val_18, y_val_18),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
) 

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60361
726/726 - 5s - 7ms/step - accuracy: 0.7336 - loss: 0.7546 - val_accuracy: 0.5606 - val_loss: 1.9521
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60361
726/726 - 5s - 6ms/step - accuracy: 0.7344 - loss: 0.7468 - val_accuracy: 0.5614 - val_loss: 1.9364
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60361
726/726 - 5s - 6ms/step - accuracy: 0.7366 - loss: 0.7427 - val_accuracy: 0.5613 - val_loss: 1.9329
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60361
726/726 - 5s - 7ms/step - accuracy: 0.7436 - loss: 0.7395 - val_accuracy: 0.5616 - val_loss: 1.9289
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60361
726/726 - 5s - 6ms/step - accuracy: 0.7439 - loss: 0.7368 - val_accuracy: 0.5618 - val_loss: 1.9211
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.60361
726/726 - 5s - 7ms/step - accuracy: 0.7444 - loss: 0.7345 - val_accuracy: 0.5616 - val_loss: 1.9111
Epoch 7/500

Epo

In [8]:
# Initial split into training and test sets
X_train_19, X_test_19, y_train_19, y_test_19 = train_test_split(
    X, y, test_size=0.3, random_state=61, stratify=y
)

# Further split the training set into training and validation sets
X_train_19, X_val_19, y_train_19, y_val_19 = train_test_split(
    X_train_19, y_train_19, test_size=0.2, random_state=61, stratify=y_train_19
)

# Check for extremely large values
print("Max value in X_train_19:", np.max(X_train_19))
print("Min value in X_train_19:", np.min(X_train_19))

X_train_19_scaled = scaler.fit_transform(X_train_19)

# Get the original class distribution
class_counts_19 = Counter(y_train_19)
print("Original class distribution:", class_counts_19)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_19 = class_counts_19[min(class_counts_19, key=class_counts_19.get)]
desired_majority_size_19 = minority_class_size_19 * 5

# Create the sampling strategy dictionary
sampling_strategy_19 = {0: desired_majority_size_19, 1: minority_class_size_19}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_19 = RandomUnderSampler(sampling_strategy=sampling_strategy_19, random_state=42)
X_resampled_19, y_resampled_19 = undersampler_19.fit_resample(X_train_19, y_train_19)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_19))

# Apply SMOTE on the smaller subset
X_train_resampled_19, y_train_resampled_19 = smote.fit_resample(X_resampled_19, y_resampled_19)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_19))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_19))



Max value in X_train_19: 2071000000.0
Min value in X_train_19: -6442447920.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [None]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_19 = X_train_resampled_19.reshape(X_train_resampled_19.shape[0], 1, 56)
X_val_19 = X_val_19.reshape(X_val_19.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_19,  # Features from CICIDS2017
    y_train_resampled_19,  # Labels from CICIDS2017
    validation_data=(X_val_19, y_val_19),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)

Epoch 1/500

Epoch 1: val_accuracy improved from -inf to 0.45160, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 8s - 11ms/step - accuracy: 0.7201 - loss: 0.9032 - val_accuracy: 0.4516 - val_loss: 1.9103
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.45160
726/726 - 5s - 6ms/step - accuracy: 0.7174 - loss: 0.8921 - val_accuracy: 0.4514 - val_loss: 1.9051
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.45160
726/726 - 5s - 6ms/step - accuracy: 0.7138 - loss: 0.8866 - val_accuracy: 0.4510 - val_loss: 1.8977
Epoch 4/500

Epoch 4: val_accuracy improved from 0.45160 to 0.45327, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 5s - 7ms/step - accuracy: 0.7125 - loss: 0.8822 - val_accuracy: 0.4533 - val_loss: 1.8866
Epoch 5/500

Epoch 5: val_accuracy improved from 0.45327 to 0.45341, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 5s - 7ms/step - accuracy: 0.7127 - loss: 0.8784 - val_accuracy: 0.4534 - val_los

In [10]:
# Initial split into training and test sets
X_train_20, X_test_20, y_train_20, y_test_20 = train_test_split(
    X, y, test_size=0.3, random_state=62, stratify=y
)

# Further split the training set into training and validation sets
X_train_20, X_val_20, y_train_20, y_val_20 = train_test_split(
    X_train_20, y_train_20, test_size=0.2, random_state=62, stratify=y_train_20
)

# Check for extremely large values
print("Max value in X_train_20:", np.max(X_train_20))
print("Min value in X_train_20:", np.min(X_train_20))

X_train_20_scaled = scaler.fit_transform(X_train_20)

# Get the original class distribution
class_counts_20 = Counter(y_train_20)
print("Original class distribution:", class_counts_20)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_20 = class_counts_20[min(class_counts_20, key=class_counts_20.get)]
desired_majority_size_20 = minority_class_size_20 * 5

# Create the sampling strategy dictionary
sampling_strategy_20 = {0: desired_majority_size_20, 1: minority_class_size_20}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_20 = RandomUnderSampler(sampling_strategy=sampling_strategy_20, random_state=42)
X_resampled_20, y_resampled_20 = undersampler_20.fit_resample(X_train_20, y_train_20)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_20))

# Apply SMOTE on the smaller subset
X_train_resampled_20, y_train_resampled_20 = smote.fit_resample(X_resampled_20, y_resampled_20)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_20))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_20))



Max value in X_train_20: 2071000000.0
Min value in X_train_20: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [11]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_20 = X_train_resampled_20.reshape(X_train_resampled_20.shape[0], 1, 56)
X_val_20 = X_val_20.reshape(X_val_20.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_20,  # Features from CICIDS2017
    y_train_resampled_20,  # Labels from CICIDS2017
    validation_data=(X_val_20, y_val_20),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy improved from -inf to 0.57734, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 8s - 10ms/step - accuracy: 0.7335 - loss: 1.0117 - val_accuracy: 0.5773 - val_loss: 1.9917
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.57734
726/726 - 4s - 6ms/step - accuracy: 0.7349 - loss: 0.9932 - val_accuracy: 0.5764 - val_loss: 1.9959
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.57734
726/726 - 4s - 5ms/step - accuracy: 0.7356 - loss: 0.9803 - val_accuracy: 0.5760 - val_loss: 1.9972
Epoch 4/500

Epoch 4: val_accuracy improved from 0.57734 to 0.57773, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 4s - 6ms/step - accuracy: 0.7382 - loss: 0.9693 - val_accuracy: 0.5777 - val_loss: 1.9932
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.57773
726/726 - 4s - 6ms/step - accuracy: 0.7386 - loss: 0.9594 - val_accuracy: 0.5766 - val_loss: 2.0011
Epoch 6/500

Epoch 6: val_accuracy improved from 0.577

In [12]:
print("X_test original shape:", X_test.shape)

X_test = X_test.reshape(X_test.shape[0], 1, 56)  # Ensure 3D

print("X_test reshaped shape:", X_test.shape)




X_test original shape: (756240, 56)
X_test reshaped shape: (756240, 1, 56)


In [13]:

# Evaluate the model
test_loss, test_accuracy = fine_tuned_model.evaluate(X_test, y_test, verbose=2)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


23633/23633 - 27s - 1ms/step - accuracy: 0.5425 - loss: 2.1382
Test Loss: 2.138221025466919
Test Accuracy: 0.542547345161438


In [14]:
# Initial split into training and test sets
X_train_21, X_test_21, y_train_21, y_test_21 = train_test_split(
    X, y, test_size=0.3, random_state=63, stratify=y
)

# Further split the training set into training and validation sets
X_train_21, X_val_21, y_train_21, y_val_21 = train_test_split(
    X_train_21, y_train_21, test_size=0.2, random_state=63, stratify=y_train_21
)

# Check for extremely large values
print("Max value in X_train_21:", np.max(X_train_21))
print("Min value in X_train_21:", np.min(X_train_21))

X_train_21_scaled = scaler.fit_transform(X_train_21)

# Get the original class distribution
class_counts_21 = Counter(y_train_21)
print("Original class distribution:", class_counts_21)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_21 = class_counts_21[min(class_counts_21, key=class_counts_21.get)]
desired_majority_size_21 = minority_class_size_21 * 5

# Create the sampling strategy dictionary
sampling_strategy_21 = {0: desired_majority_size_21, 1: minority_class_size_21}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_21 = RandomUnderSampler(sampling_strategy=sampling_strategy_21, random_state=42)
X_resampled_21, y_resampled_21 = undersampler_21.fit_resample(X_train_21, y_train_21)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_21))

# Apply SMOTE on the smaller subset
X_train_resampled_21, y_train_resampled_21 = smote.fit_resample(X_resampled_21, y_resampled_21)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_21))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_21))



Max value in X_train_21: 2071000000.0
Min value in X_train_21: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [None]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_21 = X_train_resampled_21.reshape(X_train_resampled_21.shape[0], 1, 56)
X_val_21 = X_val_21.reshape(X_val_21.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_21,  # Features from CICIDS2017
    y_train_resampled_21,  # Labels from CICIDS2017
    validation_data=(X_val_21, y_val_21),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59686
726/726 - 5s - 7ms/step - accuracy: 0.7298 - loss: 0.8538 - val_accuracy: 0.5567 - val_loss: 1.9697
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59686
726/726 - 4s - 6ms/step - accuracy: 0.7312 - loss: 0.8445 - val_accuracy: 0.5566 - val_loss: 1.9494
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59686
726/726 - 5s - 6ms/step - accuracy: 0.7316 - loss: 0.8388 - val_accuracy: 0.5574 - val_loss: 1.9297
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59686
726/726 - 4s - 5ms/step - accuracy: 0.7315 - loss: 0.8342 - val_accuracy: 0.5611 - val_loss: 1.9072
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59686
726/726 - 4s - 6ms/step - accuracy: 0.7315 - loss: 0.8304 - val_accuracy: 0.5616 - val_loss: 1.8880
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.59686
726/726 - 4s - 6ms/step - accuracy: 0.7314 - loss: 0.8271 - val_accuracy: 0.5589 - val_loss: 1.8814
Epoch 7/500

Epo

In [7]:
# Initial split into training and test sets
X_train_22, X_test_22, y_train_22, y_test_22 = train_test_split(
    X, y, test_size=0.3, random_state=64, stratify=y
)

# Further split the training set into training and validation sets
X_train_22, X_val_22, y_train_22, y_val_22 = train_test_split(
    X_train_22, y_train_22, test_size=0.2, random_state=64, stratify=y_train_22
)

# Check for extremely large values
print("Max value in X_train_22:", np.max(X_train_22))
print("Min value in X_train_22:", np.min(X_train_22))

X_train_22_scaled = scaler.fit_transform(X_train_22)

# Get the original class distribution
class_counts_22 = Counter(y_train_22)
print("Original class distribution:", class_counts_22)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_22 = class_counts_22[min(class_counts_22, key=class_counts_22.get)]
desired_majority_size_22 = minority_class_size_22 * 5

# Create the sampling strategy dictionary
sampling_strategy_22 = {0: desired_majority_size_22, 1: minority_class_size_22}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_22 = RandomUnderSampler(sampling_strategy=sampling_strategy_22, random_state=42)
X_resampled_22, y_resampled_22 = undersampler_22.fit_resample(X_train_22, y_train_22)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_22))

# Apply SMOTE on the smaller subset
X_train_resampled_22, y_train_resampled_22 = smote.fit_resample(X_resampled_22, y_resampled_22)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_22))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_22))



Max value in X_train_22: 2071000000.0
Min value in X_train_22: -6442447920.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [8]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_22 = X_train_resampled_22.reshape(X_train_resampled_22.shape[0], 1, 56)
X_val_22 = X_val_22.reshape(X_val_22.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_22,  # Features from CICIDS2017
    y_train_resampled_22,  # Labels from CICIDS2017
    validation_data=(X_val_22, y_val_22),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy improved from -inf to 0.59777, saving model to /kaggle/working/checkpoint_model_gru.keras
726/726 - 7s - 10ms/step - accuracy: 0.7275 - loss: 0.8276 - val_accuracy: 0.5978 - val_loss: 1.9563
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7292 - loss: 0.8192 - val_accuracy: 0.5912 - val_loss: 1.9489
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7297 - loss: 0.8143 - val_accuracy: 0.5943 - val_loss: 1.9441
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7302 - loss: 0.8101 - val_accuracy: 0.5939 - val_loss: 1.9455
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7303 - loss: 0.8063 - val_accuracy: 0.5911 - val_loss: 1.9457
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7304 - loss: 0.8

In [9]:
# Initial split into training and test sets
X_train_23, X_test_23, y_train_23, y_test_23 = train_test_split(
    X, y, test_size=0.3, random_state=65, stratify=y
)

# Further split the training set into training and validation sets
X_train_23, X_val_23, y_train_23, y_val_23 = train_test_split(
    X_train_23, y_train_23, test_size=0.2, random_state=65, stratify=y_train_23
)

# Check for extremely large values
print("Max value in X_train_23:", np.max(X_train_23))
print("Min value in X_train_23:", np.min(X_train_23))

X_train_23_scaled = scaler.fit_transform(X_train_23)

# Get the original class distribution
class_counts_23 = Counter(y_train_23)
print("Original class distribution:", class_counts_23)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_23 = class_counts_23[min(class_counts_23, key=class_counts_23.get)]
desired_majority_size_23 = minority_class_size_23 * 5

# Create the sampling strategy dictionary
sampling_strategy_23 = {0: desired_majority_size_23, 1: minority_class_size_23}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_23 = RandomUnderSampler(sampling_strategy=sampling_strategy_23, random_state=42)
X_resampled_23, y_resampled_23 = undersampler_23.fit_resample(X_train_23, y_train_23)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_23))

# Apply SMOTE on the smaller subset
X_train_resampled_23, y_train_resampled_23 = smote.fit_resample(X_resampled_23, y_resampled_23)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_23))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_23))



Max value in X_train_23: 2071000000.0
Min value in X_train_23: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [10]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_23 = X_train_resampled_23.reshape(X_train_resampled_23.shape[0], 1, 56)
X_val_23 = X_val_23.reshape(X_val_23.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_23,  # Features from CICIDS2017
    y_train_resampled_23,  # Labels from CICIDS2017
    validation_data=(X_val_23, y_val_23),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7371 - loss: 0.8485 - val_accuracy: 0.5561 - val_loss: 2.0162
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7387 - loss: 0.8322 - val_accuracy: 0.5608 - val_loss: 2.0201
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7380 - loss: 0.8259 - val_accuracy: 0.5609 - val_loss: 2.0161
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7376 - loss: 0.8215 - val_accuracy: 0.5565 - val_loss: 2.0107
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7374 - loss: 0.8178 - val_accuracy: 0.5570 - val_loss: 2.0070
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7383 - loss: 0.8145 - val_accuracy: 0.5570 - val_loss: 2.0036
Epoch 7/500

Epo

In [11]:
# Initial split into training and test sets
X_train_24, X_test_24, y_train_24, y_test_24 = train_test_split(
    X, y, test_size=0.3, random_state=66, stratify=y
)

# Further split the training set into training and validation sets
X_train_24, X_val_24, y_train_24, y_val_24 = train_test_split(
    X_train_24, y_train_24, test_size=0.2, random_state=66, stratify=y_train_24
)

# Check for extremely large values
print("Max value in X_train_24:", np.max(X_train_24))
print("Min value in X_train_24:", np.min(X_train_24))

X_train_24_scaled = scaler.fit_transform(X_train_24)

# Get the original class distribution
class_counts_24 = Counter(y_train_24)
print("Original class distribution:", class_counts_24)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_24 = class_counts_24[min(class_counts_24, key=class_counts_24.get)]
desired_majority_size_24 = minority_class_size_24 * 5

# Create the sampling strategy dictionary
sampling_strategy_24 = {0: desired_majority_size_24, 1: minority_class_size_24}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_24 = RandomUnderSampler(sampling_strategy=sampling_strategy_24, random_state=42)
X_resampled_24, y_resampled_24 = undersampler_24.fit_resample(X_train_24, y_train_24)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_24))

# Apply SMOTE on the smaller subset
X_train_resampled_24, y_train_resampled_24 = smote.fit_resample(X_resampled_24, y_resampled_24)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_24))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_24))



Max value in X_train_24: 2071000000.0
Min value in X_train_24: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [12]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_24 = X_train_resampled_24.reshape(X_train_resampled_24.shape[0], 1, 56)
X_val_24 = X_val_24.reshape(X_val_24.shape[0], 1, 56)


# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_24,  # Features from CICIDS2017
    y_train_resampled_24,  # Labels from CICIDS2017
    validation_data=(X_val_24, y_val_24),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7610 - loss: 0.6974 - val_accuracy: 0.5759 - val_loss: 1.7693
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7604 - loss: 0.6911 - val_accuracy: 0.5765 - val_loss: 1.7594
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7589 - loss: 0.6880 - val_accuracy: 0.5760 - val_loss: 1.7661
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7591 - loss: 0.6856 - val_accuracy: 0.5760 - val_loss: 1.7647
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7587 - loss: 0.6834 - val_accuracy: 0.5761 - val_loss: 1.7735
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7588 - loss: 0.6816 - val_accuracy: 0.5757 - val_loss: 1.7756
Epoch 7/500

Epo

In [13]:
# Initial split into training and test sets
X_train_25, X_test_25, y_train_25, y_test_25 = train_test_split(
    X, y, test_size=0.3, random_state=67, stratify=y
)

# Further split the training set into training and validation sets
X_train_25, X_val_25, y_train_25, y_val_25 = train_test_split(
    X_train_25, y_train_25, test_size=0.2, random_state=67, stratify=y_train_25
)

# Check for extremely large values
print("Max value in X_train_25:", np.max(X_train_25))
print("Min value in X_train_25:", np.min(X_train_25))

X_train_25_scaled = scaler.fit_transform(X_train_25)

# Get the original class distribution
class_counts_25 = Counter(y_train_25)
print("Original class distribution:", class_counts_25)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_25 = class_counts_25[min(class_counts_25, key=class_counts_25.get)]
desired_majority_size_25 = minority_class_size_25 * 5

# Create the sampling strategy dictionary
sampling_strategy_25 = {0: desired_majority_size_25, 1: minority_class_size_25}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_25 = RandomUnderSampler(sampling_strategy=sampling_strategy_25, random_state=42)
X_resampled_25, y_resampled_25 = undersampler_25.fit_resample(X_train_25, y_train_25)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_25))

# Apply SMOTE on the smaller subset
X_train_resampled_25, y_train_resampled_25 = smote.fit_resample(X_resampled_25, y_resampled_25)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_25))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_25))



Max value in X_train_25: 2071000000.0
Min value in X_train_25: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [14]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_25 = X_train_resampled_25.reshape(X_train_resampled_25.shape[0], 1, 56)
X_val_25 = X_val_25.reshape(X_val_25.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_25,  # Features from CICIDS2017
    y_train_resampled_25,  # Labels from CICIDS2017
    validation_data=(X_val_25, y_val_25),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59777
726/726 - 5s - 6ms/step - accuracy: 0.7097 - loss: 1.1527 - val_accuracy: 0.5640 - val_loss: 1.9433
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7215 - loss: 1.1118 - val_accuracy: 0.5630 - val_loss: 1.9433
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7213 - loss: 1.0890 - val_accuracy: 0.5623 - val_loss: 1.9468
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7220 - loss: 1.0721 - val_accuracy: 0.5730 - val_loss: 1.9524
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7229 - loss: 1.0579 - val_accuracy: 0.5706 - val_loss: 1.9514
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7264 - loss: 1.0452 - val_accuracy: 0.5683 - val_loss: 1.9570
Epoch 7/500

Epo

In [15]:
# Initial split into training and test sets
X_train_26, X_test_26, y_train_26, y_test_26 = train_test_split(
    X, y, test_size=0.3, random_state=68, stratify=y
)

# Further split the training set into training and validation sets
X_train_26, X_val_26, y_train_26, y_val_26 = train_test_split(
    X_train_26, y_train_26, test_size=0.2, random_state=68, stratify=y_train_26
)

# Check for extremely large values
print("Max value in X_train_26:", np.max(X_train_26))
print("Min value in X_train_26:", np.min(X_train_26))

X_train_26_scaled = scaler.fit_transform(X_train_26)

# Get the original class distribution
class_counts_26 = Counter(y_train_26)
print("Original class distribution:", class_counts_26)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_26 = class_counts_26[min(class_counts_26, key=class_counts_26.get)]
desired_majority_size_26 = minority_class_size_26 * 5

# Create the sampling strategy dictionary
sampling_strategy_26 = {0: desired_majority_size_26, 1: minority_class_size_26}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_26 = RandomUnderSampler(sampling_strategy=sampling_strategy_26, random_state=42)
X_resampled_26, y_resampled_26 = undersampler_26.fit_resample(X_train_26, y_train_26)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_26))

# Apply SMOTE on the smaller subset
X_train_resampled_26, y_train_resampled_26 = smote.fit_resample(X_resampled_26, y_resampled_26)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_26))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_26))



Max value in X_train_26: 2071000000.0
Min value in X_train_26: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [16]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_26 = X_train_resampled_26.reshape(X_train_resampled_26.shape[0], 1, 56)
X_val_26 = X_val_26.reshape(X_val_26.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_26,  # Features from CICIDS2017
    y_train_resampled_26,  # Labels from CICIDS2017
    validation_data=(X_val_26, y_val_26),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.59777
726/726 - 5s - 6ms/step - accuracy: 0.7417 - loss: 0.7812 - val_accuracy: 0.5828 - val_loss: 1.6687
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7412 - loss: 0.7725 - val_accuracy: 0.5881 - val_loss: 1.6570
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7413 - loss: 0.7680 - val_accuracy: 0.5838 - val_loss: 1.6498
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7399 - loss: 0.7644 - val_accuracy: 0.5838 - val_loss: 1.6530
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7396 - loss: 0.7614 - val_accuracy: 0.5838 - val_loss: 1.6570
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.59777
726/726 - 4s - 6ms/step - accuracy: 0.7418 - loss: 0.7587 - val_accuracy: 0.5889 - val_loss: 1.6543
Epoch 7/500

Epo

In [17]:
# Initial split into training and test sets
X_train_27, X_test_27, y_train_27, y_test_27 = train_test_split(
    X, y, test_size=0.3, random_state=69, stratify=y
)

# Further split the training set into training and validation sets
X_train_27, X_val_27, y_train_27, y_val_27 = train_test_split(
    X_train_27, y_train_27, test_size=0.2, random_state=69, stratify=y_train_27
)

# Check for extremely large values
print("Max value in X_train_27:", np.max(X_train_27))
print("Min value in X_train_27:", np.min(X_train_27))

X_train_27_scaled = scaler.fit_transform(X_train_27)

# Get the original class distribution
class_counts_27 = Counter(y_train_27)
print("Original class distribution:", class_counts_27)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_27 = class_counts_27[min(class_counts_27, key=class_counts_27.get)]
desired_majority_size_27 = minority_class_size_27 * 5

# Create the sampling strategy dictionary
sampling_strategy_27 = {0: desired_majority_size_27, 1: minority_class_size_27}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_27 = RandomUnderSampler(sampling_strategy=sampling_strategy_27, random_state=42)
X_resampled_27, y_resampled_27 = undersampler_27.fit_resample(X_train_27, y_train_27)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_27))

# Apply SMOTE on the smaller subset
X_train_resampled_27, y_train_resampled_27 = smote.fit_resample(X_resampled_27, y_resampled_27)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_27))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_27))




Max value in X_train_27: 2071000000.0
Min value in X_train_27: -32212234632.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [18]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_27 = X_train_resampled_27.reshape(X_train_resampled_27.shape[0], 1, 56)
X_val_27 = X_val_27.reshape(X_val_27.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_27,  # Features from CICIDS2017
    y_train_resampled_27,  # Labels from CICIDS2017
    validation_data=(X_val_27, y_val_27),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7133 - loss: 1.0301 - val_accuracy: 0.5830 - val_loss: 1.8492
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7068 - loss: 0.9993 - val_accuracy: 0.4432 - val_loss: 1.8625
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7004 - loss: 0.9798 - val_accuracy: 0.4466 - val_loss: 1.8681
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7053 - loss: 0.9640 - val_accuracy: 0.4473 - val_loss: 1.8835
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7070 - loss: 0.9507 - val_accuracy: 0.4478 - val_loss: 1.8905
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7052 - loss: 0.9391 - val_accuracy: 0.4420 - val_loss: 1.9000
Epoch 7/500

Epo

In [19]:
# Initial split into training and test sets
X_train_28, X_test_28, y_train_28, y_test_28 = train_test_split(
    X, y, test_size=0.3, random_state=70, stratify=y
)

# Further split the training set into training and validation sets
X_train_28, X_val_28, y_train_28, y_val_28 = train_test_split(
    X_train_28, y_train_28, test_size=0.2, random_state=70, stratify=y_train_28
)

# Check for extremely large values
print("Max value in X_train_28:", np.max(X_train_28))
print("Min value in X_train_28:", np.min(X_train_28))

X_train_28_scaled = scaler.fit_transform(X_train_28)

# Get the original class distribution
class_counts_28 = Counter(y_train_28)
print("Original class distribution:", class_counts_28)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_28 = class_counts_28[min(class_counts_28, key=class_counts_28.get)]
desired_majority_size_28 = minority_class_size_28 * 5

# Create the sampling strategy dictionary
sampling_strategy_28 = {0: desired_majority_size_28, 1: minority_class_size_28}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_28 = RandomUnderSampler(sampling_strategy=sampling_strategy_28, random_state=42)
X_resampled_28, y_resampled_28 = undersampler_28.fit_resample(X_train_28, y_train_28)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_28))

# Apply SMOTE on the smaller subset
X_train_resampled_28, y_train_resampled_28 = smote.fit_resample(X_resampled_28, y_resampled_28)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_28))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_28))



Max value in X_train_28: 2071000000.0
Min value in X_train_28: -6442447920.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [20]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_28 = X_train_resampled_28.reshape(X_train_resampled_28.shape[0], 1, 56)
X_val_28 = X_val_28.reshape(X_val_28.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_28,  # Features from CICIDS2017
    y_train_resampled_28,  # Labels from CICIDS2017
    validation_data=(X_val_28, y_val_28),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60070
726/726 - 5s - 7ms/step - accuracy: 0.7266 - loss: 0.8135 - val_accuracy: 0.5681 - val_loss: 2.0636
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7240 - loss: 0.8073 - val_accuracy: 0.5723 - val_loss: 2.0225
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7238 - loss: 0.8037 - val_accuracy: 0.5731 - val_loss: 1.9924
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7239 - loss: 0.8007 - val_accuracy: 0.5744 - val_loss: 1.9618
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7235 - loss: 0.7981 - val_accuracy: 0.5709 - val_loss: 1.9420
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7248 - loss: 0.7958 - val_accuracy: 0.5709 - val_loss: 1.9140
Epoch 7/500

Epo

In [21]:
# Initial split into training and test sets
X_train_29, X_test_29, y_train_29, y_test_29 = train_test_split(
    X, y, test_size=0.3, random_state=71, stratify=y
)

# Further split the training set into training and validation sets
X_train_29, X_val_29, y_train_29, y_val_29 = train_test_split(
    X_train_29, y_train_29, test_size=0.2, random_state=71, stratify=y_train_29
)

# Check for extremely large values
print("Max value in X_train_29:", np.max(X_train_29))
print("Min value in X_train_29:", np.min(X_train_29))

X_train_29_scaled = scaler.fit_transform(X_train_29)

# Get the original class distribution
class_counts_29 = Counter(y_train_29)
print("Original class distribution:", class_counts_29)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_29 = class_counts_29[min(class_counts_29, key=class_counts_29.get)]
desired_majority_size_29 = minority_class_size_29 * 5

# Create the sampling strategy dictionary
sampling_strategy_29 = {0: desired_majority_size_29, 1: minority_class_size_29}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_29 = RandomUnderSampler(sampling_strategy=sampling_strategy_29, random_state=42)
X_resampled_29, y_resampled_29 = undersampler_29.fit_resample(X_train_29, y_train_29)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_29))

# Apply SMOTE on the smaller subset
X_train_resampled_29, y_train_resampled_29 = smote.fit_resample(X_resampled_29, y_resampled_29)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_29))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_29))




Max value in X_train_29: 2071000000.0
Min value in X_train_29: -9663668122.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [22]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_29 = X_train_resampled_29.reshape(X_train_resampled_29.shape[0], 1, 56)
X_val_29 = X_val_29.reshape(X_val_29.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_29,  # Features from CICIDS2017
    y_train_resampled_29,  # Labels from CICIDS2017
    validation_data=(X_val_29, y_val_29),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)

Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7330 - loss: 0.8258 - val_accuracy: 0.5985 - val_loss: 1.6843
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7332 - loss: 0.8126 - val_accuracy: 0.5943 - val_loss: 1.6838
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60070
726/726 - 4s - 5ms/step - accuracy: 0.7334 - loss: 0.8072 - val_accuracy: 0.5943 - val_loss: 1.6803
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7381 - loss: 0.8031 - val_accuracy: 0.5914 - val_loss: 1.6803
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7388 - loss: 0.7998 - val_accuracy: 0.5916 - val_loss: 1.6802
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.60070
726/726 - 4s - 5ms/step - accuracy: 0.7391 - loss: 0.7969 - val_accuracy: 0.5891 - val_loss: 1.6803
Epoch 7/500

Epo

In [23]:
# Initial split into training and test sets
X_train_30, X_test_30, y_train_30, y_test_30 = train_test_split(
    X, y, test_size=0.3, random_state=72, stratify=y
)

# Further split the training set into training and validation sets
X_train_30, X_val_30, y_train_30, y_val_30 = train_test_split(
    X_train_30, y_train_30, test_size=0.2, random_state=72, stratify=y_train_30
)

# Check for extremely large values
print("Max value in X_train_30:", np.max(X_train_30))
print("Min value in X_train_30:", np.min(X_train_30))

X_train_30_scaled = scaler.fit_transform(X_train_30)

# Get the original class distribution
class_counts_30 = Counter(y_train_30)
print("Original class distribution:", class_counts_30)

# Set the desired ratio: majority class = 5 times the minority class
minority_class_size_30 = class_counts_30[min(class_counts_30, key=class_counts_30.get)]
desired_majority_size_30 = minority_class_size_30 * 5

# Create the sampling strategy dictionary
sampling_strategy_30 = {0: desired_majority_size_30, 1: minority_class_size_30}  # Adjust class labels accordingly

# Apply RandomUnderSampler with the dictionary
undersampler_30 = RandomUnderSampler(sampling_strategy=sampling_strategy_30, random_state=42)
X_resampled_30, y_resampled_30 = undersampler_30.fit_resample(X_train_30, y_train_30)

# Check the new class distribution
print("Class distribution after undersampling:", Counter(y_resampled_30))

# Apply SMOTE on the smaller subset
X_train_resampled_30, y_train_resampled_30 = smote.fit_resample(X_resampled_30, y_resampled_30)


#Verify the class distribution after SMOTE
from collections import Counter
print("Class Distribution Before SMOTE:", Counter(y_resampled_30))
print("Class Distribution After SMOTE:", Counter(y_train_resampled_30))



Max value in X_train_30: 2071000000.0
Min value in X_train_30: -9663668122.0
Original class distribution: Counter({0: 1173231, 4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 1: 1091, 12: 823, 14: 365, 9: 20, 13: 12, 8: 6})
Class distribution after undersampling: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution Before SMOTE: Counter({4: 96794, 2: 71688, 10: 50789, 3: 5760, 7: 3322, 6: 3015, 5: 2928, 11: 1802, 12: 823, 14: 365, 0: 30, 9: 20, 13: 12, 1: 6, 8: 6})
Class Distribution After SMOTE: Counter({0: 96794, 1: 96794, 2: 96794, 3: 96794, 4: 96794, 5: 96794, 6: 96794, 7: 96794, 8: 96794, 9: 96794, 10: 96794, 11: 96794, 12: 96794, 13: 96794, 14: 96794})


In [24]:
# Reshape the training and validation data to (samples, time_steps, features)
X_train_resampled_30 = X_train_resampled_30.reshape(X_train_resampled_30.shape[0], 1, 56)
X_val_30 = X_val_30.reshape(X_val_30.shape[0], 1, 56)

# Train the fine-tuned model
history = fine_tuned_model.fit(
    X_train_resampled_30,  # Features from CICIDS2017
    y_train_resampled_30,  # Labels from CICIDS2017
    validation_data=(X_val_30, y_val_30),  # Validation set
    epochs=500,  # Adjust based on the dataset size
    batch_size=2000,  # Adjust batch size as needed
    verbose=2,
    callbacks=[model_checkpoint]
)


Epoch 1/500

Epoch 1: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7482 - loss: 0.7515 - val_accuracy: 0.5703 - val_loss: 1.8377
Epoch 2/500

Epoch 2: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7525 - loss: 0.7439 - val_accuracy: 0.5656 - val_loss: 1.8479
Epoch 3/500

Epoch 3: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7561 - loss: 0.7406 - val_accuracy: 0.5654 - val_loss: 1.8432
Epoch 4/500

Epoch 4: val_accuracy did not improve from 0.60070
726/726 - 4s - 5ms/step - accuracy: 0.7564 - loss: 0.7382 - val_accuracy: 0.5667 - val_loss: 1.8379
Epoch 5/500

Epoch 5: val_accuracy did not improve from 0.60070
726/726 - 4s - 5ms/step - accuracy: 0.7555 - loss: 0.7361 - val_accuracy: 0.5669 - val_loss: 1.8440
Epoch 6/500

Epoch 6: val_accuracy did not improve from 0.60070
726/726 - 4s - 6ms/step - accuracy: 0.7534 - loss: 0.7344 - val_accuracy: 0.5559 - val_loss: 1.8373
Epoch 7/500

Epo