In [None]:
!pip install gdown

import gdown
import pandas as pd
import numpy as np

# Download both files
train_id = "1ZR4cnzoT4TA9uH8xeA4Pk_0jGZtBtciN"
test_id = "1Xt9wsLd2mWRONLjzT2wNdRIaFkVS_6q3"

gdown.download(f"https://drive.google.com/uc?id={train_id}", "UNSW_FEIIDS_train.csv", quiet=False)
gdown.download(f"https://drive.google.com/uc?id={test_id}", "UNSW_FEIIDS_test.csv", quiet=False)

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import time
import os


# =====================================================================
# GPU OPTIMIZATION SETTINGS
# =====================================================================
# Enable memory growth to prevent OOM errors
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPU memory growth enabled for {len(gpus)} GPU(s)")
    except RuntimeError as e:
        print(e)

# Enable XLA (Accelerated Linear Algebra) for faster computations
tf.config.optimizer.set_jit(True)

# Enable mixed precision for faster training on modern GPUs
from tensorflow.keras import mixed_precision
if gpus:
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_global_policy(policy)
    print('Mixed precision enabled: Compute dtype=%s, variable dtype=%s' %
          (policy.compute_dtype, policy.variable_dtype))

# Disable unnecessary logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')
# =====================================================================


# Create folders for saving plots and models
PLOTS_FOLDER = "bilstm_plots"
MODELS_FOLDER = "bilstm_models"
os.makedirs(PLOTS_FOLDER, exist_ok=True)
os.makedirs(MODELS_FOLDER, exist_ok=True)


# Detect runtime environment
def get_runtime_environment():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.TPUStrategy(tpu)
        return "TPU", strategy
    except:
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            return "GPU", tf.distribute.OneDeviceStrategy("/gpu:0")
        else:
            return "CPU", tf.distribute.OneDeviceStrategy("/cpu:0")


# Optimized callback - evaluate less frequently to reduce overhead
class TestEvaluationCallback(tf.keras.callbacks.Callback):
    def __init__(self, test_data, eval_every=10):
        super().__init__()
        self.test_data = test_data
        self.test_loss = []
        self.test_accuracy = []
        self.eval_every = eval_every
        self.epoch_nums = []

    def on_epoch_end(self, epoch, logs=None):
        # Only evaluate every N epochs to reduce overhead
        if (epoch + 1) % self.eval_every == 0 or epoch == 0 or (epoch + 1) == 200:
            X_test, y_test = self.test_data
            test_loss, test_acc = self.model.evaluate(X_test, y_test, verbose=0)
            self.test_loss.append(test_loss)
            self.test_accuracy.append(test_acc)
            self.epoch_nums.append(epoch + 1)


def plot_training_history(history, test_callback, environment, hidden_nodes, save_folder):
    """Plot and save training history for accuracy and loss using test set as validation"""
    all_epochs = range(1, len(history.history['accuracy']) + 1)

    # Plot Accuracy
    plt.figure(figsize=(10, 6))
    plt.plot(all_epochs, history.history['accuracy'], 'b-', label='Training Accuracy', linewidth=2, alpha=0.7)
    plt.plot(test_callback.epoch_nums, test_callback.test_accuracy, 'ro-', label='Validation Accuracy',
             linewidth=2, markersize=4)
    plt.title(f'BiLSTM Training & Validation Accuracy\n{environment} - {hidden_nodes} Hidden Nodes',
              fontsize=14, fontweight='bold')
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.legend(fontsize=11)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    acc_filename = os.path.join(save_folder, f"{environment}_{hidden_nodes}_acc.png")
    plt.savefig(acc_filename, dpi=150)
    plt.close()

    # Plot Loss
    plt.figure(figsize=(10, 6))
    plt.plot(all_epochs, history.history['loss'], 'b-', label='Training Loss', linewidth=2, alpha=0.7)
    plt.plot(test_callback.epoch_nums, test_callback.test_loss, 'ro-', label='Validation Loss',
             linewidth=2, markersize=4)
    plt.title(f'BiLSTM Training & Validation Loss\n{environment} - {hidden_nodes} Hidden Nodes',
              fontsize=14, fontweight='bold')
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.legend(fontsize=11)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    loss_filename = os.path.join(save_folder, f"{environment}_{hidden_nodes}_loss.png")
    plt.savefig(loss_filename, dpi=150)
    plt.close()


# =====================================================================
# CONFIGURATION - CHANGE THIS FOR EACH RUN
# =====================================================================
HIDDEN_NODES = 40  # Change to: 10, 20, 30, 40, 50, 60, 70, 80
# =====================================================================

ENVIRONMENT, strategy = get_runtime_environment()
print(f"\n{'='*70}")
print(f"Running on: {ENVIRONMENT}")
print(f"Hidden Nodes: {HIDDEN_NODES}")
print(f"{'='*70}\n")


# Load normalized data
print("Loading data...")
train_df = pd.read_csv("UNSW_FEIIDS_train.csv")
test_df = pd.read_csv("UNSW_FEIIDS_test.csv")


# Identify and separate labels
label_cols = ['attack_cat', 'binary_label', 'Label', 'label']
existing_labels = [col for col in label_cols if col in train_df.columns]


X_full = train_df.drop(columns=existing_labels, errors='ignore').values
y_full = train_df['binary_label'].values if 'binary_label' in train_df.columns else train_df['label'].values
X_test_full = test_df.drop(columns=existing_labels, errors='ignore').values
y_test_full = test_df['binary_label'].values if 'binary_label' in test_df.columns else test_df['label'].values


print(f"Dataset: {len(X_full):,} train | {len(X_test_full):,} test | {X_full.shape[1]} features")


# Sample 14,000 for training and 3,500 for testing (as per Table 6)
np.random.seed(42)
train_indices = np.random.choice(len(X_full), 14000, replace=False)
test_indices = np.random.choice(len(X_test_full), 3500, replace=False)


# Create train and test sets
X_train = X_full[train_indices]
y_train = y_full[train_indices]
X_test = X_test_full[test_indices]
y_test = y_test_full[test_indices]


# Reshape for LSTM: (samples, timesteps=1, features)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])


# Convert to float32 for better GPU performance
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

print(f"Prepared: {X_train.shape[0]:,} train | {X_test.shape[0]:,} test\n")


# Configuration - OPTIMIZED FOR GPU
EPOCHS = 200
BATCH_SIZE = 128  # Increased from 64 to better utilize GPU
results_file = "bilstm_table_results.csv"


# Train single model
print(f"{'='*70}")
print(f"Training BiLSTM with {HIDDEN_NODES} hidden nodes on {ENVIRONMENT}")
print(f"{'='*70}\n")

with strategy.scope():
    # Build BiLSTM model (2 layers as per paper)
    model = Sequential([
        Bidirectional(LSTM(HIDDEN_NODES,
                          return_sequences=True,
                          activation='tanh',  # Explicit for CuDNN
                          recurrent_activation='sigmoid'),  # Explicit for CuDNN
                     input_shape=(1, X_train.shape[2])),
        Bidirectional(LSTM(HIDDEN_NODES,
                          return_sequences=False,
                          activation='tanh',
                          recurrent_activation='sigmoid')),
        Dense(1, activation='sigmoid', dtype='float32')  # Explicit output dtype
    ])

    # Use Adam with slightly higher learning rate for faster convergence
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

print("Model built. Starting training...\n")

# Create optimized callback (evaluate every 10 epochs instead of every epoch)
test_callback = TestEvaluationCallback((X_test, y_test), eval_every=10)

# Create TensorFlow dataset for better GPU utilization
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)  # Prefetch for GPU

# Train model
start_train = time.time()
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    verbose=2,  # Less verbose output
    callbacks=[test_callback],
    steps_per_epoch=len(X_train) // BATCH_SIZE
)
train_time = time.time() - start_train

# Plot training history
print(f"\nGenerating plots for {HIDDEN_NODES} hidden nodes...")
plot_training_history(history, test_callback, ENVIRONMENT, HIDDEN_NODES, PLOTS_FOLDER)
print("âœ“ Plots saved\n")

# Test model (final evaluation)
start_test = time.time()
y_pred_proba = model.predict(X_test, batch_size=BATCH_SIZE, verbose=0)
test_time = time.time() - start_test

y_pred = (y_pred_proba > 0.5).astype(int).flatten()

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
test_loss = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE, verbose=0)[0]

# Store result
result = {
    'Environment': ENVIRONMENT,
    'Hidden_Nodes': HIDDEN_NODES,
    'Accuracy': round(accuracy, 4),
    'Train_Time': round(train_time, 2),
    'Test_Time': round(test_time, 2),
    'Test_Loss': round(test_loss, 4)
}

# Save model
model_filename = os.path.join(MODELS_FOLDER, f"bilstm_{ENVIRONMENT}_{HIDDEN_NODES}nodes.keras")
model.save(model_filename)
print(f"Model saved: {model_filename}\n")

# Save to CSV
result_df = pd.DataFrame([result])
if os.path.exists(results_file):
    existing_df = pd.read_csv(results_file)
    combined_df = pd.concat([existing_df, result_df], ignore_index=True)
    combined_df.to_csv(results_file, index=False)
else:
    result_df.to_csv(results_file, index=False)

print(f"{'='*70}")
print(f"RESULTS for {HIDDEN_NODES} hidden nodes:")
print(f"{'='*70}")
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Train Time: {train_time:.2f}s")
print(f"Test Time: {test_time:.2f}s")
print(f"Test Loss: {test_loss:.4f}")
print(f"\nSaved to: {results_file}")
print(f"{'='*70}\n")

# Display current results
if os.path.exists(results_file):
    final_results = pd.read_csv(results_file)
    print("All Results So Far:")
    print(final_results)
