In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from tensorflow import keras
from tensorflow.keras import layers
import xgboost as xgb

iot_data = pd.read_csv('/kaggle/input/shaon609/tondataset.csv')
target_data = pd.read_csv('/kaggle/input/afterlightgbm/lightGBMplustarget.csv')

iot_data = iot_data.drop(columns=['IPV4_SRC_ADDR', 'IPV4_DST_ADDR', 'Label'], errors='ignore')

le = LabelEncoder()
iot_data['Attack'] = le.fit_transform(iot_data['Attack'])

X_iot = iot_data.drop(columns=['Attack'])
y_iot = iot_data['Attack']
X_iot = X_iot.apply(pd.to_numeric, errors='coerce').replace([np.inf, -np.inf], np.nan).dropna()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_iot)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_iot, test_size=0.2, random_state=42, stratify=y_iot)

model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(len(np.unique(y_iot)), activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=5, batch_size=1024, validation_split=0.1)
extractor = keras.Model(inputs=model.input, outputs=model.layers[-2].output)

deep_features = extractor.predict(X_scaled, batch_size=1024)
pca = PCA(n_components=4)
pca_features = pca.fit_transform(deep_features)

pca_df = pd.DataFrame(pca_features, columns=[f'pca_feat_{i}' for i in range(1, 5)])
pca_df.index = iot_data.index  # to align indices

target_data = target_data.replace([np.inf, -np.inf], np.nan).dropna().reset_index(drop=True)
target_data = pd.concat([target_data.reset_index(drop=True), pca_df.reset_index(drop=True)], axis=1)

X_final = target_data.drop(columns=['Attack_Num'])
y_final = target_data['Attack_Num']

X_train_final, X_val_final, y_train_final, y_val_final = train_test_split(X_final, y_final, test_size=0.2, random_state=42, stratify=y_final)

xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=len(np.unique(y_final)),
    tree_method='gpu_hist',
    predictor='gpu_predictor',
    max_depth=10,
    learning_rate=0.01,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='mlogloss',
    n_estimators=1000,
    verbosity=1
)
xgb_model.fit(X_train_final, y_train_final, eval_set=[(X_val_final, y_val_final)], early_stopping_rounds=50, verbose=100)

y_pred = xgb_model.predict(X_val_final)
print("\nFinal Accuracy:", accuracy_score(y_val_final, y_pred))
print("\nClassification Report:\n", classification_report(y_val_final, y_pred))