In [None]:
# STEP 1: Install required packages
!pip install -U datasets huggingface_hub fsspec pandas scikit-learn matplotlib seaborn numpy tensorflow

In [None]:
# STEP 2: Import Libraries
from datasets import load_dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# STEP 3: Load CIC-IDS2017 Dataset (Binary)
dataset = load_dataset("sonnh-tech1/cic-ids-2017", "binary")
df = dataset["train"].to_pandas()

print(df)

In [None]:
# STEP 4: Preprocess Data
le = LabelEncoder()
df['Label'] = le.fit_transform(df['Label'])  # Benign=0, Threat=1

X = df.drop(['Label', 'LabelMap'], axis=1)
y = df['Label']

X_numeric = X.select_dtypes(include=['int64', 'float64'])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numeric)

In [None]:
# STEP 5: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [None]:
# STEP 6: Build the ANN Model
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# STEP 7: Train the Model
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    validation_split=0.2,
                    epochs=10,
                    batch_size=256,
                    callbacks=[early_stop],
                    verbose=1)

In [None]:
# STEP 8: Evaluate the Model
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32").flatten()

# Accuracy
print("✅ Accuracy:", accuracy_score(y_test, y_pred))

# Classification Report
print("\n📄 Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
conf_mat = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='coolwarm')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix - ANN')
plt.show()

In [None]:
# ✅ STEP 11: Plot Training History
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('ANN Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('ANN Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import nbformat
import os

# Get the path of the current notebook
notebook_path = os.path.abspath("__main__")

# Load and clean the notebook
try:
    with open(notebook_path, "r", encoding="utf-8") as f:
        nb = nbformat.read(f, as_version=4)

    # Remove problematic metadata
    for cell in nb.cells:
        if 'metadata' in cell and 'widgets' in cell['metadata']:
            del cell['metadata']['widgets']

    # Save cleaned notebook (you might want to save it with a different name)
    cleaned_notebook_path = "ANN_cleaned.ipynb"
    with open(cleaned_notebook_path, "w", encoding="utf-8") as f:
        nbformat.write(nb, f)
    print(f"Cleaned notebook saved to {cleaned_notebook_path}")

except FileNotFoundError:
    print(f"Error: The notebook file '{notebook_path}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")