In [2]:
import numpy as np
from scipy.linalg import pinv
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import joblib
import pandas as pd

In [23]:
class UltraEfficientKELM(BaseEstimator, ClassifierMixin):
    def __init__(self, C=1.0, kernel='rbf', gamma=None, n_landmarks=500, 
                 batch_size=1000, random_state=42):
        """
        Ultra memory-efficient KELM that:
        - Never stores full kernel matrices
        - Uses iterative solving
        - Processes data in micro-batches
        
        Parameters:
        -----------
        n_landmarks : int (500-2000 recommended)
            Controls accuracy/memory tradeoff
        batch_size : int
            Samples processed at once (reduce if memory constrained)
        """
        self.C = C
        self.kernel = kernel
        self.gamma = gamma
        self.n_landmarks = n_landmarks
        self.batch_size = batch_size
        self.random_state = random_state
        self.landmarks = None
        self.beta = None
        self.classes_ = None

    def _rbf_kernel(self, X, Y):
        """Memory-efficient RBF kernel computation"""
        X_sq = np.sum(X**2, axis=1)
        Y_sq = np.sum(Y**2, axis=1)
        XY = np.dot(X, Y.T)
        return np.exp(-self.gamma * (X_sq[:, None] + Y_sq[None, :] - 2*XY))

    def _select_landmarks(self, X, n):
        """Select landmarks using random sampling"""
        np.random.seed(self.random_state)
        indices = np.random.choice(X.shape[0], size=min(n, X.shape[0]), replace=False)
        return X[indices]

    def _solve_iteratively(self, K_landmarks, X, y_binary):
        """Solve for beta without storing full K_train"""
        # Initialize
        n_samples = X.shape[0]
        n_landmarks = K_landmarks.shape[0]
        A = K_landmarks + np.eye(n_landmarks)/self.C
        b = np.zeros(n_landmarks)
        
        # Process in micro-batches
        for i in tqdm(range(0, n_samples, self.batch_size), 
                     desc="Building solution", unit="batch"):
            batch = X[i:i+self.batch_size]
            K_batch = self._rbf_kernel(batch, self.landmarks)
            b += K_batch.T @ y_binary[i:i+self.batch_size]
        
        # Final solve
        return pinv(A) @ b

    def fit(self, X, y):
        """Fit model using iterative landmark approximation"""
        # Initialize
        if self.gamma is None:
            self.gamma = 1.0 / X.shape[1]
        self.classes_ = np.unique(y)
        
        # Select landmarks
        self.landmarks = self._select_landmarks(X, self.n_landmarks)
        K_landmarks = self._rbf_kernel(self.landmarks, self.landmarks)
        
        # Binary or multiclass
        if len(self.classes_) == 2:
            y_binary = np.where(y == self.classes_[1], 1, -1)
            self.beta = self._solve_iteratively(K_landmarks, X, y_binary)
        else:
            self.beta = []
            for cls in tqdm(self.classes_, desc="Classes"):
                y_binary = np.where(y == cls, 1, -1)
                beta_cls = self._solve_iteratively(K_landmarks, X, y_binary)
                self.beta.append(beta_cls)

    def predict(self, X):
        """Predict in batches"""
        predictions = []
        for i in tqdm(range(0, X.shape[0], self.batch_size), 
                     desc="Predicting", unit="batch"):
            batch = X[i:i+self.batch_size]
            K_batch = self._rbf_kernel(batch, self.landmarks)
            
            if len(self.classes_) == 2:
                decisions = K_batch @ self.beta
                preds = np.where(decisions > 0, self.classes_[1], self.classes_[0])
            else:
                decisions = np.array([K_batch @ b for b in self.beta])
                preds = self.classes_[np.argmax(decisions, axis=0)]
            
            predictions.append(preds)
        
        return np.concatenate(predictions)


In [4]:
train = pd.read_csv("../DATA/train.csv")
train = train.dropna()
train['Protocol'] = pd.to_numeric(train['Protocol'], errors='coerce')
train = train.dropna(subset=['Protocol'])
train['Protocol'] = train['Protocol'].astype(int)

  train = pd.read_csv("../DATA/train.csv")


In [5]:
y = train['Label'].values

In [6]:
train=None

In [8]:
X_transformed = np.load("../FEATURE_EXTRACTION/transformed_features.npy")

In [9]:
print(f"Transformed features shape: {X_transformed.shape}")
print(f"Labels shape: {y.shape}")

Transformed features shape: (12174707, 32)
Labels shape: (12174707,)


In [11]:
np.unique(y)

array(['Benign', 'Bot', 'Brute Force -Web', 'Brute Force -XSS',
       'DDOS attack-HOIC', 'DDOS attack-LOIC-UDP',
       'DDoS attacks-LOIC-HTTP', 'DoS attacks-GoldenEye',
       'DoS attacks-Hulk', 'DoS attacks-SlowHTTPTest',
       'DoS attacks-Slowloris', 'FTP-BruteForce', 'Infilteration',
       'SQL Injection', 'SSH-Bruteforce'], dtype=object)

In [12]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 7. Save your label encoder for later use (if you haven't already)
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(
        X_transformed, y_encoded, test_size=0.2, random_state=42
    )

In [21]:
from tqdm import tqdm

In [24]:
kelm = UltraEfficientKELM(
        C=1.0,
        kernel='rbf',
        gamma=0.1,
        n_landmarks=500,  # Start small, increase if memory allows
        batch_size=500    # Reduce if memory errors persist
    )
kelm.fit(X_train, y_train)

Building solution: 100%|██████████| 19480/19480 [00:25<00:00, 776.14batch/s]
Building solution: 100%|██████████| 19480/19480 [00:24<00:00, 788.37batch/s]
Building solution: 100%|██████████| 19480/19480 [00:26<00:00, 736.16batch/s]
Building solution: 100%|██████████| 19480/19480 [00:24<00:00, 782.50batch/s]
Building solution: 100%|██████████| 19480/19480 [00:24<00:00, 785.52batch/s]
Building solution: 100%|██████████| 19480/19480 [00:25<00:00, 763.34batch/s]
Building solution: 100%|██████████| 19480/19480 [00:24<00:00, 781.71batch/s]
Building solution: 100%|██████████| 19480/19480 [00:24<00:00, 808.11batch/s]
Building solution: 100%|██████████| 19480/19480 [00:25<00:00, 756.29batch/s]
Building solution: 100%|██████████| 19480/19480 [00:24<00:00, 787.75batch/s]
Building solution: 100%|██████████| 19480/19480 [00:25<00:00, 769.18batch/s]
Building solution: 100%|██████████| 19480/19480 [00:24<00:00, 791.62batch/s]
Building solution: 100%|██████████| 19480/19480 [00:26<00:00, 746.16batch/s]

In [25]:
train_acc = kelm.score(X_train, y_train)


Predicting: 100%|██████████| 19480/19480 [01:15<00:00, 258.90batch/s]


In [26]:
def calculate_accuracy(model, X, y_true, batch_size=1000):
    correct = 0
    total = len(y_true)
    # Process in batches
    for i in tqdm(range(0, total, batch_size), desc="Calculating accuracy"):
        batch_X = X[i:i+batch_size]
        batch_y = y_true[i:i+batch_size]
        # Get predictions for current batch
        preds = model.predict(batch_X)
        # Count correct predictions
        correct += np.sum(preds == batch_y)
    return correct / total

In [28]:
train_acc = calculate_accuracy(kelm, X_train[:len(X_train)//10], y_train[:len(y_train)//10])
print(f"Training Accuracy: {train_acc:.4f}")

Predicting: 100%|██████████| 2/2 [00:00<00:00, 132.86batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 245.76batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 154.21batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 248.26batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 254.83batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 198.07batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 185.10batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 226.18batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 252.19batch/s].02it/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 231.35batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 231.72batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 83.79batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 101.57batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 264.96batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 269.75batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 276.10batch/s]
P

Training Accuracy: 0.8310





In [29]:
test_acc = calculate_accuracy(kelm, X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

Predicting: 100%|██████████| 2/2 [00:00<00:00, 263.08batch/s]]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 219.60batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 285.30batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 238.72batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 246.18batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 50.99batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 228.87batch/s]5.23it/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 255.22batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 198.61batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 285.47batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 282.01batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 213.58batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 287.01batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 284.07batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 249.88batch/s]
Predicting: 100%|██████████| 2/2 [00:00<00:00, 278.96batch/s]

Test Accuracy: 0.8307





In [31]:
print("\nClass-wise Accuracy:")
for cls in kelm.classes_:
        cls_mask = y_test == cls
        cls_acc = np.mean(kelm.predict(X_test[cls_mask]) == y_test[cls_mask])
        print(f"Class {cls}: {cls_acc:.4f}")


Class-wise Accuracy:


Predicting:   1%|▏         | 60/4046 [00:00<00:13, 289.71batch/s]

Predicting: 100%|██████████| 4046/4046 [00:15<00:00, 259.28batch/s]


Class 0: 1.0000


Predicting: 100%|██████████| 86/86 [00:00<00:00, 260.20batch/s]


Class 1: 0.0000


Predicting: 100%|██████████| 1/1 [00:00<00:00, 889.38batch/s]


Class 2: 0.0000


Predicting: 100%|██████████| 1/1 [00:00<00:00, 1637.76batch/s]


Class 3: 0.0000


Predicting: 100%|██████████| 206/206 [00:00<00:00, 228.21batch/s]


Class 4: 0.0000


Predicting: 100%|██████████| 1/1 [00:00<00:00, 499.14batch/s]


Class 5: 0.0000


Predicting: 100%|██████████| 173/173 [00:00<00:00, 287.39batch/s]


Class 6: 0.0000


Predicting: 100%|██████████| 13/13 [00:00<00:00, 261.90batch/s]


Class 7: 0.0000


Predicting: 100%|██████████| 140/140 [00:00<00:00, 233.46batch/s]


Class 8: 0.0000


Predicting: 100%|██████████| 43/43 [00:00<00:00, 292.93batch/s]


Class 9: 0.0000


Predicting: 100%|██████████| 4/4 [00:00<00:00, 348.18batch/s]


Class 10: 0.0000


Predicting: 100%|██████████| 59/59 [00:00<00:00, 272.92batch/s]


Class 11: 0.0000


Predicting: 100%|██████████| 49/49 [00:00<00:00, 288.18batch/s]


Class 12: 0.0000


Predicting: 100%|██████████| 1/1 [00:00<00:00, 1949.03batch/s]

Class 13: 0.0000



Predicting: 100%|██████████| 57/57 [00:00<00:00, 295.37batch/s]

Class 14: 0.0000





In [27]:
joblib.dump(kelm, 'memory_efficient_kelm.pkl')
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']