In [1]:
pip install tabulate



In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.neighbors import NearestNeighbors
from tabulate import tabulate
import time

class OptimizedModel:
    def __init__(self):
        self.scaler = MinMaxScaler()
        self.svd = TruncatedSVD(n_components=3)
        self.mudring_svm = None
        self.columns = None
        self.selected_features_ = None

    def fit(self, X, y):
        start_time = time.time()
        print("Starting data preprocessing...")

        # Preprocess data
        X = pd.get_dummies(X)
        self.columns = X.columns
        X.fillna(X.mean(), inplace=True)
        print("Data preprocessing completed. Starting normalization...")

        X_scaled = self.scaler.fit_transform(X)
        print("Normalization completed. Starting feature extraction...")

        X_svd = self.svd.fit_transform(X_scaled)
        print("Feature extraction completed. Starting feature selection...")

        self.selected_features_ = self.northern_goshawk(X_svd, y)
        print(f"Selected features: {self.selected_features_}")
        print("Feature selection completed. Starting data balancing...")

        X_resampled, y_resampled = self.fast_asmote(X_svd[:, self.selected_features_], y, target_class='anomaly')
        print("Data balancing completed. Starting training...")

        best_params = self.mudring_optimization(X_resampled, y_resampled)
        self.mudring_svm = self.mudring_svm_train(X_resampled, y_resampled, best_params)

        training_time = time.time() - start_time
        print(f"Training completed in {training_time:.2f} seconds")

    def northern_goshawk(self, X, y):
        print("Starting optimized Northern Goshawk...")

        # Optimized parameters
        pop_size = 5  # Reduced population size
        feature_size = X.shape[1]
        max_iter = 3  # Reduced iterations

        def objective_function(solution):
            if np.sum(solution) == 0:
                return 0.0

            selected_features = X[:, solution == 1]
            # Use a faster SVM configuration for evaluation
            svm = SVC(kernel='linear', C=1.0, max_iter=1000)
            try:
                # Use a subset of data for faster evaluation
                sample_size = min(1000, len(X))
                idx = np.random.choice(len(X), sample_size, replace=False)
                score = svm.fit(selected_features[idx], y[idx]).score(selected_features[idx], y[idx])
                return score
            except:
                return 0.0

        # Initialize population
        population = np.random.randint(2, size=(pop_size, feature_size))
        for i in range(pop_size):
            if np.sum(population[i]) == 0:
                population[i][np.random.randint(feature_size)] = 1

        best_solution = population[0]
        best_fitness = objective_function(best_solution)

        for iteration in range(max_iter):
            print(f"NGO Iteration {iteration + 1}/{max_iter}")

            for i in range(pop_size):
                new_solution = population[i].copy()
                flip_idx = np.random.choice(feature_size, size=max(1, feature_size//10))
                new_solution[flip_idx] = 1 - new_solution[flip_idx]

                if np.sum(new_solution) == 0:
                    new_solution[np.random.randint(feature_size)] = 1

                new_fitness = objective_function(new_solution)

                if new_fitness > best_fitness:
                    best_fitness = new_fitness
                    best_solution = new_solution

        selected_features = np.where(best_solution == 1)[0]
        if len(selected_features) == 0:
            selected_features = np.array([0, 1, 2])

        return selected_features

    def fast_asmote(self, X, y, target_class='anomaly', k=3):
        """Optimized ASMOTE implementation"""
        print("Starting optimized ASMOTE...")

        X, y = np.array(X), np.array(y)
        minority_mask = y == target_class
        X_minority = X[minority_mask]

        if len(X_minority) == 0:
            return X, y

        # Calculate required synthetic samples
        n_minority = len(X_minority)
        n_synthetic = min(len(X) - n_minority, n_minority * 2)  # Limited synthetic samples

        nn = NearestNeighbors(n_neighbors=k+1, n_jobs=-1)  # Parallel processing
        nn.fit(X_minority)

        synthetic_samples = []
        indices = np.random.choice(len(X_minority), min(n_synthetic, 1000))  # Limit iterations

        for i in indices:
            neighbors = nn.kneighbors([X_minority[i]], return_distance=False)[0][1:]
            neighbor_idx = np.random.choice(neighbors)
            gap = np.random.rand()
            synthetic_sample = X_minority[i] + gap * (X_minority[neighbor_idx] - X_minority[i])
            synthetic_samples.append(synthetic_sample)

        if synthetic_samples:
            X = np.vstack([X, synthetic_samples])
            y = np.concatenate([y, [target_class] * len(synthetic_samples)])

        return X, y

    def mudring_svm_train(self, X, y, params):
        """Fast SVM training with optimized parameters"""
        svm_model = SVC(
            kernel=params['kernel'],
            C=params['C'],
            gamma=params['gamma'],
            max_iter=2000,  # Limited iterations
            cache_size=500  # Increased cache for speed
        )
        svm_model.fit(X, y)
        return svm_model

    def mudring_optimization(self, X, y):
        """Simplified parameter selection"""
        return {
            'kernel': 'rbf',
            'C': 1.0,
            'gamma': 'scale'
        }

    def predict(self, X):
        X = pd.get_dummies(X)
        X = X.reindex(columns=self.columns, fill_value=0)
        X.fillna(X.mean(), inplace=True)
        X_scaled = self.scaler.transform(X)
        X_svd = self.svd.transform(X_scaled)
        return self.mudring_svm.predict(X_svd[:, self.selected_features_])

def main():
    start_time = time.time()

    print("Loading data...")
    train = pd.read_csv('/content/UNSW_NB15_training-set.csv')
    test = pd.read_csv('/content/UNSW_NB15_testing-set.csv')

    # Prepare data
    X_train = train.drop(['attack_cat'], axis=1)
    y_train = train['attack_cat']

    if 'attack_cat' in test.columns:
        X_test = test.drop(['attack_cat'], axis=1)
        y_test = test['attack_cat']
    else:
        X_test = test
        y_test = None

    print("\nTraining model...")
    model = OptimizedModel()
    model.fit(X_train, y_train)

    print("\nMaking predictions...")
    if y_test is not None:
        y_pred = model.predict(X_test)
        results = [
            ["Accuracy", f"{accuracy_score(y_test, y_pred):.4f}"],
            ["Classification Report", "\n" + classification_report(y_test, y_pred)]
        ]
        print("\nResults:")
        print(tabulate(results, headers=["Metric", "Value"]))
    else:
        print("Predictions:", model.predict(X_test))

    total_time = time.time() - start_time
    print(f"\nTotal execution time: {total_time:.2f} seconds")

if __name__ == "__main__":
    main()

Loading data...

Training model...
Starting data preprocessing...
Data preprocessing completed. Starting normalization...
Normalization completed. Starting feature extraction...
Feature extraction completed. Starting feature selection...
Starting optimized Northern Goshawk...
NGO Iteration 1/3
NGO Iteration 2/3
NGO Iteration 3/3
Selected features: [0 1 2]
Feature selection completed. Starting data balancing...
Starting optimized ASMOTE...
Data balancing completed. Starting training...




Training completed in 137.73 seconds

Making predictions...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Results:
Metric                 Value
---------------------  -------------------------------------------------------
Accuracy               0.7657
Classification Report  precision    recall  f1-score   support

                             Analysis       0.00      0.00      0.00       677
                             Backdoor       0.00      0.00      0.00       583
                                  DoS       0.22      0.02      0.03      4089
                             Exploits       0.78      0.25      0.38     11132
                              Fuzzers       0.41      0.61      0.49      6062
                              Generic       1.00      0.96      0.98     18871
                               Normal       0.92      0.99      0.96     37000
                       Reconnaissance       0.14      0.45      0.21      3496
                            Shellcode       0.00      0.00      0.00       378
                                Worms       0.00      0.00      0.00        4

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
