In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# Load and clean data
data = pd.read_csv('/kaggle/input/afterlightgbm/lightGBMplustarget.csv')
data = data.replace([np.inf, -np.inf], np.nan).dropna()
for col in data.columns:
    data[col] = pd.to_numeric(data[col], errors='coerce')
data = data.dropna().clip(lower=-1e10, upper=1e10).reset_index(drop=True)

# Features and labels
X = data.drop(columns=['Attack_Num']).astype(np.float32)
y = data['Attack_Num']

# Split dataset
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Convert to DMatrix
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)

# Parameters with GPU support
params = {
    'objective': 'multi:softmax',
    'num_class': len(np.unique(y)),
    'tree_method': 'gpu_hist',
    'predictor': 'gpu_predictor',
    'eval_metric': 'mlogloss',
    'learning_rate': 0.01,
    'max_depth': 10,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'lambda': 1,
    'alpha': 0.5,
    'random_state': 42
}

# Store training history
evals_result = {}
evals = [(dtrain, 'train'), (dval, 'eval')]

# Train model with early stopping
model = xgb.train(
    params=params,
    dtrain=dtrain,
    num_boost_round=3000,
    evals=evals,
    early_stopping_rounds=50,
    evals_result=evals_result,
    verbose_eval=100
)

# Predict on train and val sets
y_train_pred = model.predict(dtrain)
y_val_pred = model.predict(dval)

# Evaluate
train_acc = accuracy_score(y_train, y_train_pred)
val_acc = accuracy_score(y_val, y_val_pred)

print(f"\n✅ Final Training Accuracy: {train_acc:.4f}")
print(f"✅ Final Validation Accuracy: {val_acc:.4f}")

print("\n📘 Training Classification Report:")
print(classification_report(y_train, y_train_pred))

print("\n📗 Validation Classification Report:")
print(classification_report(y_val, y_val_pred))


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.



[0]	train-mlogloss:2.25675	eval-mlogloss:2.25675
[100]	train-mlogloss:0.68242	eval-mlogloss:0.68245
[200]	train-mlogloss:0.30304	eval-mlogloss:0.30316
[300]	train-mlogloss:0.16634	eval-mlogloss:0.16654
[400]	train-mlogloss:0.11383	eval-mlogloss:0.11412
[500]	train-mlogloss:0.09264	eval-mlogloss:0.09303
[600]	train-mlogloss:0.08285	eval-mlogloss:0.08334
[700]	train-mlogloss:0.07807	eval-mlogloss:0.07863
[800]	train-mlogloss:0.07519	eval-mlogloss:0.07584
[900]	train-mlogloss:0.07308	eval-mlogloss:0.07382
[1000]	train-mlogloss:0.07154	eval-mlogloss:0.07236
[1100]	train-mlogloss:0.07032	eval-mlogloss:0.07122
[1200]	train-mlogloss:0.06921	eval-mlogloss:0.07020
[1300]	train-mlogloss:0.06816	eval-mlogloss:0.06924
[1400]	train-mlogloss:0.06721	eval-mlogloss:0.06839
[1500]	train-mlogloss:0.06631	eval-mlogloss:0.06759
[1600]	train-mlogloss:0.06544	eval-mlogloss:0.06683
[1700]	train-mlogloss:0.06463	eval-mlogloss:0.06614
[1800]	train-mlogloss:0.06390	eval-mlogloss:0.06553
[1900]	train-mlogloss:0.


    E.g. tree_method = "hist", device = "cuda"




✅ Final Training Accuracy: 0.9819
✅ Final Validation Accuracy: 0.9806

📘 Training Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00   4879575
           1       1.00      1.00      1.00   3025135
           2       0.94      0.98      0.96   1964016
           3       0.99      0.99      0.99   1620987
           4       0.97      0.96      0.97    922658
           5       0.91      0.93      0.92    570087
           6       0.97      0.88      0.93    547572
           7       1.00      1.00      1.00     13447
           8       0.99      0.41      0.58      6179
           9       1.00      1.00      1.00      2740

    accuracy                           0.98  13552396
   macro avg       0.98      0.91      0.93  13552396
weighted avg       0.98      0.98      0.98  13552396


📗 Validation Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import tensorflow_addons as tfa
from keras_tabnet.models import TabNetClassifier
import xgboost as xgb
from sklearn.ensemble import VotingClassifier
from sklearn.base import BaseEstimator, ClassifierMixin

data = pd.read_csv('/kaggle/input/afterlightgbm/lightGBMplustarget.csv')
data = data.replace([np.inf, -np.inf], np.nan).dropna()
for col in data.columns:
    data[col] = pd.to_numeric(data[col], errors='coerce')
data = data.dropna().clip(lower=-1e10, upper=1e10).reset_index(drop=True)

X = data.drop(columns=['Attack_Num']).astype(np.float32)
y = data['Attack_Num']
num_classes = len(np.unique(y))

X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.2, random_state=42, stratify=y_trainval
)

sm = SMOTE(random_state=42)
X_train_smote, y_train_smote = sm.fit_resample(X_train, y_train)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_smote)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

y_train_cat = to_categorical(y_train_smote, num_classes)
y_val_cat = to_categorical(y_val, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

def build_keras_model(input_dim, num_classes):
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(1024, activation='swish'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.4),
        keras.layers.Dense(512, activation='swish'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.4),
        keras.layers.Dense(256, activation='swish'),
        keras.layers.BatchNormalization(),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(0.0005),
        loss=tfa.losses.SigmoidFocalCrossEntropy(),
        metrics=['accuracy']
    )
    return model

keras_model = build_keras_model(X_train_scaled.shape[1], num_classes)
early_stop = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True)
lr_schedule = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, min_lr=1e-6)

keras_model.fit(
    X_train_scaled, y_train_cat,
    validation_data=(X_val_scaled, y_val_cat),
    epochs=100,
    batch_size=512,
    callbacks=[early_stop, lr_schedule],
    verbose=2
)

tabnet = TabNetClassifier(verbose=0)
tabnet.fit(
    X_train_smote.values, y_train_smote.values,
    eval_set=[(X_val.values, y_val.values)],
    patience=15,
    max_epochs=200
)

xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=num_classes,
    tree_method='gpu_hist',
    predictor='gpu_predictor',
    learning_rate=0.01,
    max_depth=10,
    subsample=0.8,
    colsample_bytree=0.8,
    n_estimators=500,
    random_state=42
)
xgb_model.fit(X_train_smote, y_train_smote)

class KerasClassifierWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, model):
        self.model = model
    def fit(self, X, y): return self
    def predict(self, X):
        return np.argmax(self.model.predict(X), axis=1)

ensemble = VotingClassifier(estimators=[
    ('keras', KerasClassifierWrapper(keras_model)),
    ('tabnet', tabnet),
    ('xgb', xgb_model)
], voting='soft')

ensemble.fit(X_val_scaled, y_val) 

ensemble_preds = ensemble.predict(X_test_scaled)
print(f"\n✅ Ensemble Test Accuracy: {accuracy_score(y_test, ensemble_preds):.4f}")
print("\n📙 Ensemble Test Classification Report:")
print(classification_report(y_test, ensemble_preds))

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

data = pd.read_csv('/kaggle/input/afterlightgbm/lightGBMplustarget.csv')
data = data.replace([np.inf, -np.inf], np.nan).dropna()
for col in data.columns:
    data[col] = pd.to_numeric(data[col], errors='coerce')
data = data.dropna().clip(lower=-1e10, upper=1e10).reset_index(drop=True)

X = data.drop(columns=['Attack_Num']).astype(np.float32)
y = data['Attack_Num']

X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.2, random_state=42, stratify=y_trainval
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

num_classes = len(np.unique(y))
y_train_cat = keras.utils.to_categorical(y_train, num_classes)
y_val_cat = keras.utils.to_categorical(y_val, num_classes)
y_test_cat = keras.utils.to_categorical(y_test, num_classes)

def build_model(input_dim, num_classes):
    return keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(1024, activation='swish'),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(512, activation='swish'),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        layers.Dense(256, activation='swish'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(128, activation='swish'),
        layers.BatchNormalization(),
        layers.Dense(num_classes, activation='softmax')
    ])

model = build_model(X_train_scaled.shape[1], num_classes)
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

early_stop = keras.callbacks.EarlyStopping(
    monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1
)
lr_schedule = keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.5, patience=5, verbose=1, min_lr=1e-6
)

history = model.fit(
    X_train_scaled, y_train_cat,
    validation_data=(X_val_scaled, y_val_cat),
    epochs=100,
    batch_size=512,
    callbacks=[early_stop, lr_schedule],
    verbose=2
)

y_train_pred = np.argmax(model.predict(X_train_scaled), axis=1)
y_val_pred = np.argmax(model.predict(X_val_scaled), axis=1)
y_test_pred = np.argmax(model.predict(X_test_scaled), axis=1)

print(f"\n✅ Final Training Accuracy: {accuracy_score(y_train, y_train_pred):.4f}")
print(f"✅ Final Validation Accuracy: {accuracy_score(y_val, y_val_pred):.4f}")
print(f"✅ Final Test Accuracy: {accuracy_score(y_test, y_test_pred):.4f}")

print("\n📘 Training Classification Report:")
print(classification_report(y_train, y_train_pred))

print("\n📗 Validation Classification Report:")
print(classification_report(y_val, y_val_pred))

print("\n📙 Test Classification Report:")
print(classification_report(y_test, y_test_pred))

2025-04-29 18:14:15.962380: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745950456.162004     372 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745950456.216793     372 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
I0000 00:00:1745950546.534486     372 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1745950546.535144     372 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability:

Epoch 1/100


I0000 00:00:1745950557.254031     410 service.cc:148] XLA service 0x783cd4019a70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745950557.254650     410 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1745950557.254676     410 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1745950557.719196     410 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1745950561.204311     410 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


23823/23823 - 78s - 3ms/step - accuracy: 0.9372 - loss: 0.1812 - val_accuracy: 0.9584 - val_loss: 0.1233 - learning_rate: 5.0000e-04
Epoch 2/100
23823/23823 - 63s - 3ms/step - accuracy: 0.9557 - loss: 0.1306 - val_accuracy: 0.9300 - val_loss: 0.1730 - learning_rate: 5.0000e-04
Epoch 3/100
23823/23823 - 64s - 3ms/step - accuracy: 0.9573 - loss: 0.1258 - val_accuracy: 0.9623 - val_loss: 0.1127 - learning_rate: 5.0000e-04
Epoch 4/100
23823/23823 - 64s - 3ms/step - accuracy: 0.9593 - loss: 0.1201 - val_accuracy: 0.9628 - val_loss: 0.1112 - learning_rate: 5.0000e-04
Epoch 5/100
23823/23823 - 64s - 3ms/step - accuracy: 0.9602 - loss: 0.1172 - val_accuracy: 0.9643 - val_loss: 0.1060 - learning_rate: 5.0000e-04
Epoch 6/100
23823/23823 - 63s - 3ms/step - accuracy: 0.9609 - loss: 0.1148 - val_accuracy: 0.9648 - val_loss: 0.1042 - learning_rate: 5.0000e-04
Epoch 7/100
23823/23823 - 63s - 3ms/step - accuracy: 0.9616 - loss: 0.1128 - val_accuracy: 0.9650 - val_loss: 0.1031 - learning_rate: 5.0000e-

TypeError: object of type 'NoneType' has no len()

In [8]:
%%capture
pip install --upgrade numpy pandas imbalanced-learn xgboost keras-tabnet tensorflow-addons