# Load Dataset

In [None]:
!pip install opendatasets

In [None]:
import opendatasets as od

od.download("https://www.kaggle.com/datasets/chethuhn/network-intrusion-dataset/data")

In [None]:
import pandas as pd

df = pd.read_csv("/content/network-intrusion-dataset/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv")

In [None]:
df.head()

# Import Library

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import Constant

# Describe Data

In [None]:
df.describe(include=['O'])

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df[' Label'].unique()

# Imputasi

In [None]:
# Mengganti nilai inf dan -inf dengan NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)

In [None]:
# Imputasi nilai hilang
imputer = SimpleImputer(strategy='mean')
X_numeric = df.select_dtypes(include=[np.number])
X_numeric_imputed = pd.DataFrame(imputer.fit_transform(X_numeric), columns=X_numeric.columns)

In [None]:
# Mengganti nilai hilang pada kolom kategorikal dengan modus
X_categorical = df.select_dtypes(include=['object'])
X_categorical_imputed = pd.DataFrame(imputer.set_params(strategy='most_frequent').fit_transform(X_categorical), columns=X_categorical.columns)

In [None]:
# Gabungkan kembali kolom numerik dan kategorikal
df_cleaned = pd.concat([X_numeric_imputed, X_categorical_imputed], axis=1)

# Encoding

In [None]:
# Lakukan encoding pada kolom target
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df[' Label'])

In [None]:
# Pisahkan fitur dan target
X = df_cleaned.drop(' Label', axis=1)

# Normalisasi

In [None]:
# 3. Normalisasi dengan Min-Max Scaling
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# SMOTE

In [None]:
# 4. SMOTE untuk menangani ketidakseimbangan kelas
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# Seleksi fitur

In [None]:
# 5. Seleksi fitur (opsional)
# Pilih 10 fitur terbaik berdasarkan ANOVA F-value
selector = SelectKBest(score_func=f_classif, k=10)
X_selected = pd.DataFrame(selector.fit_transform(X_resampled, y_resampled), columns=X_scaled.columns[selector.get_support()])

# Split dataset

In [None]:
# 6. Split dataset menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X_selected, y_resampled, test_size=0.3, random_state=42)

Hasil akhir

In [None]:
# Hasil akhir
print("Mapping label ke angka:")
print(dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

print("Distribusi target setelah SMOTE:")
print(pd.Series(y_resampled).value_counts())

print("Dataset setelah preprocessing:")
print(X_train.head())

# Reshape data

In [None]:
# Ubah data menjadi bentuk 2D untuk CNN
X_train_cnn = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)

# Verifikasi bentuk data setelah reshaping
print(X_train_cnn.shape)
print(X_test_cnn.shape)

# Build Model

In [None]:
# Fungsi untuk membangun model dengan hyperparameter yang dapat disesuaikan
def build_model(learning_rate=0.001, alpha=0.001, dropout_rate=0.3, bias_init_value=0.0, optimizer_choice='adam', input_shape=(100, 1)):
    # Pilih optimizer berdasarkan input
    if optimizer_choice == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif optimizer_choice == 'rmsprop':
        optimizer = RMSprop(learning_rate=learning_rate)
    else:
        raise ValueError("Optimizer must be 'adam' or 'rmsprop'.")

    model = Sequential()

    # Layer konvolusi pertama dengan regularisasi L2 dan bias initializer
    model.add(Conv1D(filters=64,
                     kernel_size=3,
                     activation='relu',
                     kernel_regularizer=l2(alpha),
                     input_shape=input_shape,
                     bias_initializer=Constant(value=bias_init_value)))

    # MaxPooling
    model.add(MaxPooling1D(pool_size=2))

    # Layer konvolusi kedua
    model.add(Conv1D(filters=128,
                     kernel_size=3,
                     activation='relu',
                     kernel_regularizer=l2(alpha),
                     bias_initializer=Constant(value=bias_init_value)))

    # MaxPooling
    model.add(MaxPooling1D(pool_size=2))

    # Dropout layer untuk regularisasi
    model.add(Dropout(rate=dropout_rate))

    # Flatten layer untuk fully connected layer
    model.add(Flatten())

    # Fully connected layer
    model.add(Dense(units=128, activation='relu', kernel_regularizer=l2(alpha)))

    # Output layer
    model.add(Dense(units=2, activation='softmax'))  # Ganti sesuai jumlah kelas

    # Compile model
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

# Train

In [None]:
def train_model(model, X_train_cnn, y_train, epochs=100, batch_size=32, validation_data=None):
    # Melatih model dengan data latih
    history = model.fit(X_train_cnn, y_train, epochs=epochs, batch_size=batch_size, validation_data=validation_data)
    return history

# Evaluate

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def plot_confusion_matrix(cm, class_names):
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

def plot_roc_curve(y_test, y_pred_prob):
    fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure()
    plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    plt.show()

In [None]:
def evaluate_model(model, X_test_cnn, y_test):
    test_loss, test_acc = model.evaluate(X_test_cnn, y_test)
    print(f"Test accuracy: {test_acc}")
    print(f"Test loss: {test_loss}")

    y_pred = model.predict(X_test_cnn)
    y_pred_classes = np.argmax(y_pred, axis=1)

    precision = precision_score(y_test, y_pred_classes)
    recall = recall_score(y_test, y_pred_classes)
    f1 = f1_score(y_test, y_pred_classes)
    auc_roc = roc_auc_score(y_test, y_pred[:, 1])

    cm = confusion_matrix(y_test, y_pred_classes)
    plot_confusion_matrix(cm, class_names=['Class 0', 'Class 1'])
    plot_roc_curve(y_test, y_pred[:, 1])

    return test_acc, test_loss, precision, recall, f1, auc_roc

# Experiment

## Adam Optimizer

In [None]:
def print_experiment_results(history, test_loss, test_acc):
    # Mencetak hasil pelatihan (history)
    print("\nTraining History:")
    print("Epochs:", len(history.history['loss']))
    print("Training Accuracy:", history.history['accuracy'][-1])
    print("Validation Accuracy:", history.history['val_accuracy'][-1])

    # Mencetak hasil evaluasi
    print(f"Test accuracy: {test_acc}")
    print(f"Test loss: {test_loss}")

    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print(f"AUC-ROC: {auc_roc}")

In [None]:
def experiment_with_hyperparameters(learning_rate=0.001, alpha=0.001, dropout_rate=0.3, bias_init_value=0.0, optimizer_choice='adam', X_train_cnn=None, y_train=None, X_test_cnn=None, y_test=None):
    # Bangun model dengan hyperparameter yang telah disesuaikan
    model = build_model(learning_rate=learning_rate, alpha=alpha, dropout_rate=dropout_rate,
                        bias_init_value=bias_init_value, optimizer_choice=optimizer_choice, input_shape=(X_train_cnn.shape[1], 1))

    # Melatih model dengan data latih
    history = train_model(model, X_train_cnn, y_train, epochs=3, batch_size=32, validation_data=(X_test_cnn, y_test))

    # Evaluasi model dengan data uji
    test_loss, test_acc, precision, recall, f1, auc_roc = evaluate_model(model, X_test_cnn, y_test)

    # Mencetak hasil eksperimen
    print(f"Experiment with learning_rate={learning_rate}, alpha={alpha}, dropout_rate={dropout_rate}, "
          f"bias_init_value={bias_init_value}, optimizer={optimizer_choice}")

    # Menampilkan hasil eksperimen
    print_experiment_results(history, test_loss, test_acc)

In [None]:
from time import time

# Daftar jumlah hidden nodes dan learning rates untuk percobaan
hidden_nodes_list = [20, 40, 60, 80, 100]
learning_rates = [0.001, 0.005, 0.008]

# List untuk menyimpan hasil percobaan
results = []

# Loop untuk semua kombinasi hidden nodes dan learning rates
for hidden_nodes in hidden_nodes_list:
    for lr in learning_rates:
        print(f"Testing with hidden nodes={hidden_nodes}, learning rate={lr}")

        # Fungsi untuk membangun model dengan jumlah hidden nodes tertentu
        def build_model_with_hidden_nodes(hidden_units):
            return build_model(learning_rate=lr, alpha=0.001, dropout_rate=0.3,
                               bias_init_value=0.0, optimizer_choice='adam', input_shape=(X_train_cnn.shape[1], 1))

        # Mulai pencatatan waktu
        start_time = time()

        # Bangun dan latih model
        model = build_model_with_hidden_nodes(hidden_units=hidden_nodes)
        history = train_model(model, X_train_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

        # Hitung waktu training
        elapsed_time = time() - start_time

        # Ambil akurasi dari hasil training dan validasi
        train_acc = history.history['accuracy'][-1]  # Akurasi terakhir pada training
        val_acc = history.history['val_accuracy'][-1]  # Akurasi terakhir pada validasi

        # Hitung akurasi pada data pengujian
        test_loss, test_acc = model.evaluate(X_test_cnn, y_test, verbose=0)  # Evaluasi data pengujian

        # Simpan hasil
        results.append({
            "hidden_nodes": hidden_nodes,
            "learning_rate": lr,
            "train_acc": train_acc,
            "val_acc": val_acc,
            "test_acc": test_acc,
            "time": elapsed_time
        })

# Cetak hasil percobaan dengan format tabel
print(f"\n{'Hidden Nodes':<15}{'Learning Rate':<15}{'Train Accuracy':<15}{'Val Accuracy':<15}{'Test Accuracy':<15}{'Time (s)':<10}")
print("="*85)

for result in results:
    print(f"{result['hidden_nodes']:<15}{result['learning_rate']:<15}{result['train_acc']:<15.4f}{result['val_acc']:<15.4f}{result['test_acc']:<15.4f}{result['time']:<10.2f}")

Confusion Matrix, Accuracy, Precision, Recall, F1-Score, and AUC-ROC

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

# Gunakan parameter terbaik yang telah ditentukan
best_learning_rate = 0.001
best_hidden_nodes = 100

# Bangun model dengan parameter terbaik
best_model = build_model(learning_rate=best_learning_rate, alpha=0.001, dropout_rate=0.3,
                        bias_init_value=0.0, optimizer_choice='adam', input_shape=(X_train_cnn.shape[1], 1))

# Latih model dengan parameter terbaik
history = train_model(best_model, X_train_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

# Evaluasi model dengan data uji
y_pred = best_model.predict(X_test_cnn)
y_pred_classes = np.argmax(y_pred, axis=1)

# Hitung metrik evaluasi
accuracy = accuracy_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)
f1 = f1_score(y_test, y_pred_classes)
auc_roc = roc_auc_score(y_test, y_pred[:, 1])

# Tampilkan confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(6,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Tampilkan ROC Curve
fpr, tpr, _ = roc_curve(y_test, y_pred[:, 1])
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

# Tampilkan hasil evaluasi
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print(f"AUC-ROC: {auc_roc}")

## RMSprop Optimizer

In [None]:
def print_experiment_results(history, test_loss, test_acc):
    # Mencetak hasil pelatihan (history)
    print("\nTraining History:")
    print("Epochs:", len(history.history['loss']))
    print("Training Accuracy:", history.history['accuracy'][-1])
    print("Validation Accuracy:", history.history['val_accuracy'][-1])

    # Mencetak hasil evaluasi
    print(f"Test accuracy: {test_acc}")
    print(f"Test loss: {test_loss}")

    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print(f"AUC-ROC: {auc_roc}")

In [None]:
def experiment_with_hyperparameters(learning_rate=0.001, alpha=0.001, dropout_rate=0.3, bias_init_value=0.0, optimizer_choice='rmsprop', X_train_cnn=None, y_train=None, X_test_cnn=None, y_test=None):
    # Bangun model dengan hyperparameter yang telah disesuaikan
    model = build_model(learning_rate=learning_rate, alpha=alpha, dropout_rate=dropout_rate,
                        bias_init_value=bias_init_value, optimizer_choice=optimizer_choice, input_shape=(X_train_cnn.shape[1], 1))

    # Melatih model dengan data latih
    history = train_model(model, X_train_cnn, y_train, epochs=3, batch_size=32, validation_data=(X_test_cnn, y_test))

    # Evaluasi model dengan data uji
    test_loss, test_acc, precision, recall, f1, auc_roc = evaluate_model(model, X_test_cnn, y_test)

    # Mencetak hasil eksperimen
    print(f"Experiment with learning_rate={learning_rate}, alpha={alpha}, dropout_rate={dropout_rate}, "
          f"bias_init_value={bias_init_value}, optimizer={optimizer_choice}")

    # Menampilkan hasil eksperimen
    print_experiment_results(history, test_loss, test_acc)

In [None]:
from time import time

# Daftar jumlah hidden nodes dan learning rates untuk percobaan
hidden_nodes_list = [20, 40, 60, 80, 100]
learning_rates = [0.001, 0.005, 0.008]

# List untuk menyimpan hasil percobaan
results = []

# Loop untuk semua kombinasi hidden nodes dan learning rates
for hidden_nodes in hidden_nodes_list:
    for lr in learning_rates:
        print(f"Testing with hidden nodes={hidden_nodes}, learning rate={lr}")

        # Fungsi untuk membangun model dengan jumlah hidden nodes tertentu
        def build_model_with_hidden_nodes(hidden_units):
            return build_model(learning_rate=lr, alpha=0.001, dropout_rate=0.3,
                               bias_init_value=0.0, optimizer_choice='rmsprop', input_shape=(X_train_cnn.shape[1], 1))

        # Mulai pencatatan waktu
        start_time = time()

        # Bangun dan latih model
        model = build_model_with_hidden_nodes(hidden_units=hidden_nodes)
        history = train_model(model, X_train_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

        # Hitung waktu training
        elapsed_time = time() - start_time

        # Ambil akurasi dari hasil training dan validasi
        train_acc = history.history['accuracy'][-1]  # Akurasi terakhir pada training
        val_acc = history.history['val_accuracy'][-1]  # Akurasi terakhir pada validasi

        # Hitung akurasi pada data pengujian
        test_loss, test_acc = model.evaluate(X_test_cnn, y_test, verbose=0)  # Evaluasi data pengujian

        # Simpan hasil
        results.append({
            "hidden_nodes": hidden_nodes,
            "learning_rate": lr,
            "train_acc": train_acc,
            "val_acc": val_acc,
            "test_acc": test_acc,
            "time": elapsed_time
        })

# Cetak hasil percobaan dengan format tabel
print(f"\n{'Hidden Nodes':<15}{'Learning Rate':<15}{'Train Accuracy':<15}{'Val Accuracy':<15}{'Test Accuracy':<15}{'Time (s)':<10}")
print("="*85)

for result in results:
    print(f"{result['hidden_nodes']:<15}{result['learning_rate']:<15}{result['train_acc']:<15.4f}{result['val_acc']:<15.4f}{result['test_acc']:<15.4f}{result['time']:<10.2f}")

Confusion Matrix, Accuracy, Precision, Recall, F1-Score, and AUC-ROC

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

# Gunakan parameter terbaik yang telah ditentukan
best_learning_rate = 0.001
best_hidden_nodes = 100

# Bangun model dengan parameter terbaik
best_model = build_model(learning_rate=best_learning_rate, alpha=0.001, dropout_rate=0.3,
                        bias_init_value=0.0, optimizer_choice='rmsprop', input_shape=(X_train_cnn.shape[1], 1))

# Latih model dengan parameter terbaik
history = train_model(best_model, X_train_cnn, y_train, epochs=50, batch_size=32, validation_data=(X_test_cnn, y_test))

# Evaluasi model dengan data uji
y_pred = best_model.predict(X_test_cnn)
y_pred_classes = np.argmax(y_pred, axis=1)

# Hitung metrik evaluasi
accuracy = accuracy_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)
f1 = f1_score(y_test, y_pred_classes)
auc_roc = roc_auc_score(y_test, y_pred[:, 1])

# Tampilkan confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
plt.figure(figsize=(6,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Tampilkan ROC Curve
fpr, tpr, _ = roc_curve(y_test, y_pred[:, 1])
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

# Tampilkan hasil evaluasi
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print(f"AUC-ROC: {auc_roc}")